stat.py 37 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945
  1. # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. # TODO: define statistical functions of a tensor
  15. import paddle
  16. from paddle import _C_ops
  17. from paddle.framework import (
  18. in_dynamic_mode,
  19. in_dynamic_or_pir_mode,
  20. )
  21. from ..base.data_feeder import check_type, check_variable_and_dtype
  22. from ..common_ops_import import Variable
  23. from ..framework import LayerHelper, core
  24. from .math import _get_reduce_axis_with_tensor
  25. from .search import where
  26. __all__ = []
  27. def mean(x, axis=None, keepdim=False, name=None):
  28. """
  29. Computes the mean of the input tensor's elements along ``axis``.
  30. Args:
  31. x (Tensor): The input Tensor with data type float32, float64.
  32. axis (int|list|tuple, optional): The axis along which to perform mean
  33. calculations. ``axis`` should be int, list(int) or tuple(int). If
  34. ``axis`` is a list/tuple of dimension(s), mean is calculated along
  35. all element(s) of ``axis`` . ``axis`` or element(s) of ``axis``
  36. should be in range [-D, D), where D is the dimensions of ``x`` . If
  37. ``axis`` or element(s) of ``axis`` is less than 0, it works the
  38. same way as :math:`axis + D` . If ``axis`` is None, mean is
  39. calculated over all elements of ``x``. Default is None.
  40. keepdim (bool, optional): Whether to reserve the reduced dimension(s)
  41. in the output Tensor. If ``keepdim`` is True, the dimensions of
  42. the output Tensor is the same as ``x`` except in the reduced
  43. dimensions(it is of size 1 in this case). Otherwise, the shape of
  44. the output Tensor is squeezed in ``axis`` . Default is False.
  45. name (str, optional): Name for the operation (optional, default is None).
  46. For more information, please refer to :ref:`api_guide_Name`.
  47. Returns:
  48. Tensor, results of average along ``axis`` of ``x``, with the same data
  49. type as ``x``.
  50. Examples:
  51. .. code-block:: python
  52. >>> import paddle
  53. >>> x = paddle.to_tensor([[[1., 2., 3., 4.],
  54. ... [5., 6., 7., 8.],
  55. ... [9., 10., 11., 12.]],
  56. ... [[13., 14., 15., 16.],
  57. ... [17., 18., 19., 20.],
  58. ... [21., 22., 23., 24.]]])
  59. >>> out1 = paddle.mean(x)
  60. >>> print(out1.numpy())
  61. 12.5
  62. >>> out2 = paddle.mean(x, axis=-1)
  63. >>> print(out2.numpy())
  64. [[ 2.5 6.5 10.5]
  65. [14.5 18.5 22.5]]
  66. >>> out3 = paddle.mean(x, axis=-1, keepdim=True)
  67. >>> print(out3.numpy())
  68. [[[ 2.5]
  69. [ 6.5]
  70. [10.5]]
  71. [[14.5]
  72. [18.5]
  73. [22.5]]]
  74. >>> out4 = paddle.mean(x, axis=[0, 2])
  75. >>> print(out4.numpy())
  76. [ 8.5 12.5 16.5]
  77. """
  78. if in_dynamic_or_pir_mode():
  79. return _C_ops.mean(x, axis, keepdim)
  80. else:
  81. reduce_all, axis = _get_reduce_axis_with_tensor(axis, x)
  82. check_variable_and_dtype(
  83. x,
  84. 'x/input',
  85. ['uint16', "int32", 'float16', 'float32', 'float64'],
  86. 'mean/reduce_mean',
  87. )
  88. check_type(
  89. axis, 'axis/dim', (int, list, tuple, Variable), 'mean/reduce_mean'
  90. )
  91. if isinstance(axis, (list, tuple)):
  92. for item in axis:
  93. check_type(
  94. item,
  95. 'elements of axis/dim',
  96. (int, Variable),
  97. 'mean/reduce_mean',
  98. )
  99. helper = LayerHelper('mean', **locals())
  100. attrs = {'dim': axis, 'keep_dim': keepdim, 'reduce_all': reduce_all}
  101. out = helper.create_variable_for_type_inference(x.dtype)
  102. helper.append_op(
  103. type='reduce_mean',
  104. inputs={'X': x},
  105. outputs={'Out': out},
  106. attrs=attrs,
  107. )
  108. return out
  109. def var(x, axis=None, unbiased=True, keepdim=False, name=None):
  110. """
  111. Computes the variance of ``x`` along ``axis`` .
  112. Args:
  113. x (Tensor): The input Tensor with data type float16, float32, float64.
  114. axis (int|list|tuple, optional): The axis along which to perform variance calculations. ``axis`` should be int, list(int) or tuple(int).
  115. - If ``axis`` is a list/tuple of dimension(s), variance is calculated along all element(s) of ``axis`` . ``axis`` or element(s) of ``axis`` should be in range [-D, D), where D is the dimensions of ``x`` .
  116. - If ``axis`` or element(s) of ``axis`` is less than 0, it works the same way as :math:`axis + D` .
  117. - If ``axis`` is None, variance is calculated over all elements of ``x``. Default is None.
  118. unbiased (bool, optional): Whether to use the unbiased estimation. If ``unbiased`` is True, the divisor used in the computation is :math:`N - 1`, where :math:`N` represents the number of elements along ``axis`` , otherwise the divisor is :math:`N`. Default is True.
  119. keep_dim (bool, optional): Whether to reserve the reduced dimension in the output Tensor. The result tensor will have one fewer dimension than the input unless keep_dim is true. Default is False.
  120. name (str, optional): Name for the operation (optional, default is None). For more information, please refer to :ref:`api_guide_Name`.
  121. Returns:
  122. Tensor, results of variance along ``axis`` of ``x``, with the same data type as ``x``.
  123. Examples:
  124. .. code-block:: python
  125. >>> import paddle
  126. >>> x = paddle.to_tensor([[1.0, 2.0, 3.0], [1.0, 4.0, 5.0]])
  127. >>> out1 = paddle.var(x)
  128. >>> print(out1.numpy())
  129. 2.6666667
  130. >>> out2 = paddle.var(x, axis=1)
  131. >>> print(out2.numpy())
  132. [1. 4.3333335]
  133. """
  134. if not in_dynamic_mode():
  135. check_variable_and_dtype(
  136. x, 'x', ['float16', 'float32', 'float64'], 'var'
  137. )
  138. u = mean(x, axis, True, name)
  139. out = paddle.sum(paddle.pow((x - u), 2), axis, keepdim=keepdim, name=name)
  140. dtype = x.dtype
  141. n = paddle.cast(paddle.numel(x), "int64") / paddle.cast(
  142. paddle.numel(out), "int64"
  143. )
  144. n = n.astype(dtype)
  145. if unbiased:
  146. one_const = paddle.ones([], x.dtype)
  147. n = where(n > one_const, n - 1.0, one_const)
  148. n.stop_gradient = True
  149. out /= n
  150. return out
  151. def std(x, axis=None, unbiased=True, keepdim=False, name=None):
  152. """
  153. Computes the standard-deviation of ``x`` along ``axis`` .
  154. Args:
  155. x (Tensor): The input Tensor with data type float16, float32, float64.
  156. axis (int|list|tuple, optional): The axis along which to perform
  157. standard-deviation calculations. ``axis`` should be int, list(int)
  158. or tuple(int). If ``axis`` is a list/tuple of dimension(s),
  159. standard-deviation is calculated along all element(s) of ``axis`` .
  160. ``axis`` or element(s) of ``axis`` should be in range [-D, D),
  161. where D is the dimensions of ``x`` . If ``axis`` or element(s) of
  162. ``axis`` is less than 0, it works the same way as :math:`axis + D` .
  163. If ``axis`` is None, standard-deviation is calculated over all
  164. elements of ``x``. Default is None.
  165. unbiased (bool, optional): Whether to use the unbiased estimation. If
  166. ``unbiased`` is True, the standard-deviation is calculated via the
  167. unbiased estimator. If ``unbiased`` is True, the divisor used in
  168. the computation is :math:`N - 1`, where :math:`N` represents the
  169. number of elements along ``axis`` , otherwise the divisor is
  170. :math:`N`. Default is True.
  171. keepdim (bool, optional): Whether to reserve the reduced dimension(s)
  172. in the output Tensor. If ``keepdim`` is True, the dimensions of
  173. the output Tensor is the same as ``x`` except in the reduced
  174. dimensions(it is of size 1 in this case). Otherwise, the shape of
  175. the output Tensor is squeezed in ``axis`` . Default is False.
  176. name (str, optional): Name for the operation (optional, default is None).
  177. For more information, please refer to :ref:`api_guide_Name`.
  178. Returns:
  179. Tensor, results of standard-deviation along ``axis`` of ``x``, with the
  180. same data type as ``x``.
  181. Examples:
  182. .. code-block:: python
  183. >>> import paddle
  184. >>> x = paddle.to_tensor([[1.0, 2.0, 3.0], [1.0, 4.0, 5.0]])
  185. >>> out1 = paddle.std(x)
  186. >>> print(out1.numpy())
  187. 1.6329932
  188. >>> out2 = paddle.std(x, unbiased=False)
  189. >>> print(out2.numpy())
  190. 1.490712
  191. >>> out3 = paddle.std(x, axis=1)
  192. >>> print(out3.numpy())
  193. [1. 2.081666]
  194. """
  195. if not in_dynamic_or_pir_mode():
  196. check_variable_and_dtype(
  197. x, 'x', ['float16', 'float32', 'float64'], 'std'
  198. )
  199. out = var(**locals())
  200. return paddle.sqrt(out)
  201. def numel(x, name=None):
  202. """
  203. Returns the number of elements for a tensor, which is a 0-D int64 Tensor with shape [].
  204. Args:
  205. x (Tensor): The input Tensor, it's data type can be bool, float16, float32, float64, int32, int64, complex64, complex128.
  206. name (str, optional): Name for the operation (optional, default is None).
  207. For more information, please refer to :ref:`api_guide_Name`.
  208. Returns:
  209. Tensor: The number of elements for the input Tensor, whose shape is [].
  210. Examples:
  211. .. code-block:: python
  212. >>> import paddle
  213. >>> x = paddle.full(shape=[4, 5, 7], fill_value=0, dtype='int32')
  214. >>> numel = paddle.numel(x)
  215. >>> print(numel.numpy())
  216. 140
  217. """
  218. if in_dynamic_or_pir_mode():
  219. return _C_ops.numel(x)
  220. else:
  221. if not isinstance(x, Variable):
  222. raise TypeError("x must be a Tensor in numel")
  223. helper = LayerHelper('numel', **locals())
  224. out = helper.create_variable_for_type_inference(
  225. dtype=core.VarDesc.VarType.INT64
  226. )
  227. helper.append_op(type='size', inputs={'Input': x}, outputs={'Out': out})
  228. return out
  229. def nanmedian(x, axis=None, keepdim=False, mode='avg', name=None):
  230. r"""
  231. Compute the median along the specified axis, while ignoring NaNs.
  232. If the valid count of elements is a even number,
  233. the average value of both elements in the middle is calculated as the median.
  234. Args:
  235. x (Tensor): The input Tensor, it's data type can be int32, int64, float16, bfloat16, float32, float64.
  236. axis (None|int|list|tuple, optional):
  237. The axis along which to perform median calculations ``axis`` should be int or list of int.
  238. ``axis`` should be in range [-D, D), where D is the dimensions of ``x`` .
  239. If ``axis`` is less than 0, it works the same way as :math:`axis + D`.
  240. If ``axis`` is None, median is calculated over all elements of ``x``. Default is None.
  241. keepdim (bool, optional): Whether to reserve the reduced dimension(s)
  242. in the output Tensor. If ``keepdim`` is True, the dimensions of
  243. the output Tensor is the same as ``x`` except in the reduced
  244. dimensions(it is of size 1 in this case). Otherwise, the shape of
  245. the output Tensor is squeezed in ``axis`` . Default is False.
  246. mode (str, optional): Whether to use mean or min operation to calculate
  247. the nanmedian values when the input tensor has an even number of non-NaN elements
  248. along the dimension ``axis``. Support 'avg' and 'min'. Default is 'avg'.
  249. name (str, optional): Name for the operation (optional, default is None).
  250. For more information, please refer to :ref:`api_guide_Name`.
  251. Returns:
  252. Tensor or tuple of Tensor. If ``mode`` == 'min' and ``axis`` is int, the result
  253. will be a tuple of two tensors (nanmedian value and nanmedian index). Otherwise,
  254. only nanmedian value will be returned.
  255. Examples:
  256. .. code-block:: python
  257. >>> import paddle
  258. >>> x = paddle.to_tensor([[float('nan'), 2. , 3. ], [0. , 1. , 2. ]])
  259. >>> y1 = x.nanmedian()
  260. >>> print(y1.numpy())
  261. 2.0
  262. >>> y2 = x.nanmedian(0)
  263. >>> print(y2.numpy())
  264. [0. 1.5 2.5]
  265. >>> y3 = x.nanmedian(0, keepdim=True)
  266. >>> print(y3.numpy())
  267. [[0. 1.5 2.5]]
  268. >>> y4 = x.nanmedian((0, 1))
  269. >>> print(y4.numpy())
  270. 2.0
  271. >>> y5 = x.nanmedian(mode='min')
  272. >>> print(y5.numpy())
  273. 2.0
  274. >>> y6, y6_index = x.nanmedian(0, mode='min')
  275. >>> print(y6.numpy())
  276. [0. 1. 2.]
  277. >>> print(y6_index.numpy())
  278. [1 1 1]
  279. >>> y7, y7_index = x.nanmedian(1, mode='min')
  280. >>> print(y7.numpy())
  281. [2. 1.]
  282. >>> print(y7_index.numpy())
  283. [1 1]
  284. >>> y8 = x.nanmedian((0,1), mode='min')
  285. >>> print(y8.numpy())
  286. 2.0
  287. """
  288. if not isinstance(x, (Variable, paddle.pir.Value)):
  289. raise TypeError("In median, the input x should be a Tensor.")
  290. if isinstance(axis, (list, tuple)) and len(axis) == 0:
  291. raise ValueError("Axis list should not be empty.")
  292. if mode not in ('avg', 'min'):
  293. raise ValueError(f"Mode {mode} is not supported. Must be avg or min.")
  294. need_index = (axis is not None) and (not isinstance(axis, (list, tuple)))
  295. if axis is None:
  296. axis = []
  297. elif isinstance(axis, tuple):
  298. axis = list(axis)
  299. elif isinstance(axis, int):
  300. axis = [axis]
  301. if in_dynamic_or_pir_mode():
  302. out, indices = _C_ops.nanmedian(x, axis, keepdim, mode)
  303. indices.stop_gradient = True
  304. else:
  305. check_variable_and_dtype(
  306. x,
  307. 'X',
  308. ['int32', 'int64', 'float16', 'float32', 'float64', 'uint16'],
  309. 'nanmedian',
  310. )
  311. helper = LayerHelper('nanmedian', **locals())
  312. attrs = {'axis': axis, 'keepdim': keepdim, 'mode': mode}
  313. out = helper.create_variable_for_type_inference(x.dtype)
  314. indices = helper.create_variable_for_type_inference(paddle.int64)
  315. helper.append_op(
  316. type='nanmedian',
  317. inputs={'X': x},
  318. outputs={'Out': out, 'MedianIndex': indices},
  319. attrs=attrs,
  320. )
  321. indices.stop_gradient = True
  322. if mode == 'min' and need_index:
  323. return out, indices
  324. else:
  325. return out
  326. def median(x, axis=None, keepdim=False, mode='avg', name=None):
  327. """
  328. Compute the median along the specified axis.
  329. Args:
  330. x (Tensor): The input Tensor, it's data type can be float16, float32, float64, int32, int64.
  331. axis (int, optional): The axis along which to perform median calculations ``axis`` should be int.
  332. ``axis`` should be in range [-D, D), where D is the dimensions of ``x`` .
  333. If ``axis`` is less than 0, it works the same way as :math:`axis + D`.
  334. If ``axis`` is None, median is calculated over all elements of ``x``. Default is None.
  335. keepdim (bool, optional): Whether to reserve the reduced dimension(s)
  336. in the output Tensor. If ``keepdim`` is True, the dimensions of
  337. the output Tensor is the same as ``x`` except in the reduced
  338. dimensions(it is of size 1 in this case). Otherwise, the shape of
  339. the output Tensor is squeezed in ``axis`` . Default is False.
  340. mode (str, optional): Whether to use mean or min operation to calculate
  341. the median values when the input tensor has an even number of elements
  342. in the dimension ``axis``. Support 'avg' and 'min'. Default is 'avg'.
  343. name (str, optional): Name for the operation (optional, default is None).
  344. For more information, please refer to :ref:`api_guide_Name`.
  345. Returns:
  346. Tensor or tuple of Tensor.
  347. If ``mode`` == 'avg', the result will be the tensor of median values;
  348. If ``mode`` == 'min' and ``axis`` is None, the result will be the tensor of median values;
  349. If ``mode`` == 'min' and ``axis`` is not None, the result will be a tuple of two tensors
  350. containing median values and their indices.
  351. When ``mode`` == 'avg', if data type of ``x`` is float64, data type of median values will be float64,
  352. otherwise data type of median values will be float32.
  353. When ``mode`` == 'min', the data type of median values will be the same as ``x``. The data type of
  354. indices will be int64.
  355. Examples:
  356. .. code-block:: python
  357. >>> import paddle
  358. >>> import numpy as np
  359. >>> x = paddle.arange(12).reshape([3, 4])
  360. >>> print(x)
  361. Tensor(shape=[3, 4], dtype=int64, place=Place(cpu), stop_gradient=True,
  362. [[0 , 1 , 2 , 3 ],
  363. [4 , 5 , 6 , 7 ],
  364. [8 , 9 , 10, 11]])
  365. >>> y1 = paddle.median(x)
  366. >>> print(y1)
  367. Tensor(shape=[], dtype=float32, place=Place(cpu), stop_gradient=True,
  368. 5.50000000)
  369. >>> y2 = paddle.median(x, axis=0)
  370. >>> print(y2)
  371. Tensor(shape=[4], dtype=float32, place=Place(cpu), stop_gradient=True,
  372. [4., 5., 6., 7.])
  373. >>> y3 = paddle.median(x, axis=1)
  374. >>> print(y3)
  375. Tensor(shape=[3], dtype=float32, place=Place(cpu), stop_gradient=True,
  376. [1.50000000, 5.50000000, 9.50000000])
  377. >>> y4 = paddle.median(x, axis=0, keepdim=True)
  378. >>> print(y4)
  379. Tensor(shape=[1, 4], dtype=float32, place=Place(cpu), stop_gradient=True,
  380. [[4., 5., 6., 7.]])
  381. >>> y5 = paddle.median(x, mode='min')
  382. >>> print(y5)
  383. Tensor(shape=[], dtype=int64, place=Place(cpu), stop_gradient=True,
  384. 5)
  385. >>> median_value, median_indices = paddle.median(x, axis=1, mode='min')
  386. >>> print(median_value)
  387. Tensor(shape=[3], dtype=int64, place=Place(cpu), stop_gradient=True,
  388. [1, 5, 9])
  389. >>> print(median_indices)
  390. Tensor(shape=[3], dtype=int64, place=Place(cpu), stop_gradient=True,
  391. [1, 1, 1])
  392. >>> # cases containing nan values
  393. >>> x = paddle.to_tensor(np.array([[1,float('nan'),3,float('nan')],[1,2,3,4],[float('nan'),1,2,3]]))
  394. >>> y6 = paddle.median(x, axis=-1, keepdim=True)
  395. >>> print(y6)
  396. Tensor(shape=[3, 1], dtype=float64, place=Place(cpu), stop_gradient=True,
  397. [[nan ],
  398. [2.50000000],
  399. [nan ]])
  400. >>> median_value, median_indices = paddle.median(x, axis=1, keepdim=True, mode='min')
  401. >>> print(median_value)
  402. Tensor(shape=[3, 1], dtype=float64, place=Place(cpu), stop_gradient=True,
  403. [[nan],
  404. [2. ],
  405. [nan]])
  406. >>> print(median_indices)
  407. Tensor(shape=[3, 1], dtype=int64, place=Place(cpu), stop_gradient=True,
  408. [[1],
  409. [1],
  410. [0]])
  411. """
  412. if not isinstance(x, (Variable, paddle.pir.Value)):
  413. raise TypeError("In median, the input x should be a Tensor.")
  414. if in_dynamic_mode() and x.size == 0:
  415. # TODO: Currently, `__eq__` don't support arguments (`pir.Value` & `int`)
  416. raise ValueError("In median, the size of input x should not be 0.")
  417. is_flatten = False
  418. dims = len(x.shape)
  419. if dims == 0:
  420. assert axis in [
  421. -1,
  422. 0,
  423. None,
  424. ], 'when input 0-D, axis can only be [-1, 0] or default None'
  425. is_flatten = True
  426. if mode not in ('avg', 'min'):
  427. raise ValueError(f"Mode {mode} is not supported. Must be avg or min.")
  428. need_idx = axis is not None
  429. if axis is None:
  430. is_flatten = True
  431. if is_flatten:
  432. x = paddle.flatten(x)
  433. axis = 0
  434. else:
  435. if not isinstance(axis, int) or not (axis < dims and axis >= -dims):
  436. raise ValueError(
  437. "In median, axis should be none or an integer in range [-rank(x), rank(x))."
  438. )
  439. if axis < 0:
  440. axis += dims
  441. sz = x.shape[axis]
  442. kth = sz >> 1
  443. tensor_topk, idx = paddle.topk(x, kth + 1, axis=axis, largest=False)
  444. if mode == 'avg':
  445. dtype = (
  446. 'float64'
  447. if x.dtype
  448. in [core.VarDesc.VarType.FP64, paddle.base.core.DataType.FLOAT64]
  449. else 'float32'
  450. )
  451. if sz & 1 == 0:
  452. out_tensor = paddle.slice(
  453. tensor_topk, axes=[axis], starts=[kth - 1], ends=[kth]
  454. ) + paddle.slice(
  455. tensor_topk, axes=[axis], starts=[kth], ends=[kth + 1]
  456. )
  457. out_tensor = paddle.cast(out_tensor, dtype=dtype) / 2
  458. else:
  459. out_tensor = paddle.cast(
  460. paddle.slice(
  461. tensor_topk, axes=[axis], starts=[kth], ends=[kth + 1]
  462. ),
  463. dtype=dtype,
  464. )
  465. out_tensor = out_tensor + paddle.sum(
  466. paddle.cast(paddle.isnan(x), dtype=dtype) * x.astype(dtype),
  467. axis=axis,
  468. keepdim=True,
  469. )
  470. else: # mode == 'min'
  471. if sz & 1 == 0:
  472. out_tensor = paddle.slice(
  473. tensor_topk, axes=[axis], starts=[kth - 1], ends=[kth]
  474. )
  475. if need_idx:
  476. out_idx = paddle.slice(
  477. idx, axes=[axis], starts=[kth - 1], ends=[kth]
  478. )
  479. else:
  480. out_tensor = paddle.slice(
  481. tensor_topk, axes=[axis], starts=[kth], ends=[kth + 1]
  482. )
  483. if need_idx:
  484. out_idx = paddle.slice(
  485. idx, axes=[axis], starts=[kth], ends=[kth + 1]
  486. )
  487. # if contain nan on axis, return nan for that axis
  488. out_tensor = out_tensor + paddle.sum(
  489. paddle.cast(paddle.isnan(x), dtype=x.dtype) * x,
  490. axis=axis,
  491. keepdim=True,
  492. ).astype(x.dtype)
  493. if need_idx:
  494. # replace index using the first nan value's index on axis for out_idx
  495. # topk is not stable on cpu device, use argsort instead
  496. x_isnan = paddle.isnan(x).astype("int64")
  497. x_all_zero = paddle.zeros_like(x_isnan)
  498. index_along_axis = paddle.argsort(
  499. x_all_zero, axis=axis, stable=True
  500. )
  501. # find the index of the leading one in x_isnan
  502. cumsum = x_isnan.cumsum(axis=axis)
  503. x_isnan = x_isnan * paddle.where(cumsum > 1, 0, 1)
  504. nan_index = paddle.sum(
  505. index_along_axis * x_isnan, axis=axis, keepdim=True
  506. )
  507. nan_index_mask = paddle.sum(x_isnan, axis=axis, keepdim=True)
  508. out_idx = (
  509. out_idx * paddle.logical_not(nan_index_mask).astype('int64')
  510. + nan_index
  511. )
  512. if is_flatten:
  513. if keepdim:
  514. out_tensor = out_tensor.reshape([1] * dims)
  515. else:
  516. out_tensor = out_tensor.reshape([])
  517. else:
  518. if not keepdim:
  519. out_tensor = out_tensor.squeeze(axis)
  520. if mode == 'min' and need_idx:
  521. if not keepdim:
  522. out_idx = out_idx.squeeze(axis)
  523. return out_tensor, out_idx
  524. return out_tensor
  525. def _compute_quantile(
  526. x, q, axis=None, keepdim=False, interpolation="linear", ignore_nan=False
  527. ):
  528. """
  529. Compute the quantile of the input along the specified axis.
  530. Args:
  531. x (Tensor): The input Tensor, it's data type can be float32, float64, int32, int64.
  532. q (int|float|list|Tensor): The q for calculate quantile, which should be in range [0, 1]. If q is a list or
  533. a 1-D Tensor, each element of q will be calculated and the first dimension of output is same to the number of ``q`` .
  534. If q is a 0-D Tensor, it will be treated as an integer or float.
  535. axis (int|list, optional): The axis along which to calculate quantile. ``axis`` should be int or list of int.
  536. ``axis`` should be in range [-D, D), where D is the dimensions of ``x`` .
  537. If ``axis`` is less than 0, it works the same way as :math:`axis + D`.
  538. If ``axis`` is a list, quantile is calculated over all elements of given axises.
  539. If ``axis`` is None, quantile is calculated over all elements of ``x``. Default is None.
  540. keepdim (bool, optional): Whether to reserve the reduced dimension(s)
  541. in the output Tensor. If ``keepdim`` is True, the dimensions of
  542. the output Tensor is the same as ``x`` except in the reduced
  543. dimensions(it is of size 1 in this case). Otherwise, the shape of
  544. the output Tensor is squeezed in ``axis`` . Default is False.
  545. interpolation (str, optional): The interpolation method to use
  546. when the desired quantile falls between two data points. Must be one of linear, higher,
  547. lower, midpoint and nearest. Default is linear.
  548. ignore_nan: (bool, optional): Whether to ignore NaN of input Tensor.
  549. If ``ignore_nan`` is True, it will calculate nanquantile.
  550. Otherwise it will calculate quantile. Default is False.
  551. Returns:
  552. Tensor, results of quantile along ``axis`` of ``x``.
  553. In order to obtain higher precision, data type of results will be float64.
  554. """
  555. # Validate x
  556. if not isinstance(x, (Variable, paddle.pir.Value)):
  557. raise TypeError("input x should be a Tensor.")
  558. # Validate q
  559. if isinstance(q, (int, float)):
  560. q = [q]
  561. elif isinstance(q, (list, tuple)):
  562. if len(q) <= 0:
  563. raise ValueError("q should not be empty")
  564. elif isinstance(q, Variable):
  565. if len(q.shape) > 1:
  566. raise ValueError("q should be a 0-D tensor or a 1-D tensor")
  567. if len(q.shape) == 0:
  568. q = [q]
  569. else:
  570. raise TypeError(
  571. "Type of q should be int, float, list or tuple, or tensor"
  572. )
  573. for q_num in q:
  574. # we do not validate tensor q in static mode
  575. if not in_dynamic_or_pir_mode() and isinstance(q_num, Variable):
  576. break
  577. if q_num < 0 or q_num > 1:
  578. raise ValueError("q should be in range [0, 1]")
  579. if interpolation not in [
  580. "linear",
  581. "lower",
  582. "higher",
  583. "nearest",
  584. "midpoint",
  585. ]:
  586. raise ValueError(
  587. f"interpolation must be one of 'linear', 'lower', 'higher', 'nearest' or 'midpoint', but got {interpolation}"
  588. )
  589. # Validate axis
  590. dims = len(x.shape)
  591. out_shape = list(x.shape)
  592. if axis is None:
  593. x = paddle.flatten(x)
  594. axis = 0
  595. out_shape = [1] * dims
  596. else:
  597. if isinstance(axis, list):
  598. axis_src, axis_dst = [], []
  599. for axis_single in axis:
  600. if not isinstance(axis_single, int) or not (
  601. axis_single < dims and axis_single >= -dims
  602. ):
  603. raise ValueError(
  604. "Axis should be None, int, or a list, element should in range [-rank(x), rank(x))."
  605. )
  606. if axis_single < 0:
  607. axis_single = axis_single + dims
  608. axis_src.append(axis_single)
  609. out_shape[axis_single] = 1
  610. axis_dst = list(range(-len(axis), 0))
  611. x = paddle.moveaxis(x, axis_src, axis_dst)
  612. if len(axis_dst) == 0:
  613. x = paddle.flatten(x)
  614. axis = 0
  615. else:
  616. x = paddle.flatten(x, axis_dst[0], axis_dst[-1])
  617. axis = axis_dst[0]
  618. else:
  619. if not isinstance(axis, int) or not (axis < dims and axis >= -dims):
  620. raise ValueError(
  621. "Axis should be None, int, or a list, element should in range [-rank(x), rank(x))."
  622. )
  623. if axis < 0:
  624. axis += dims
  625. out_shape[axis] = 1
  626. mask = x.isnan()
  627. valid_counts = mask.logical_not().sum(axis=axis, keepdim=True)
  628. indices = []
  629. for q_num in q:
  630. if in_dynamic_or_pir_mode():
  631. q_num = paddle.to_tensor(q_num, dtype=x.dtype)
  632. if ignore_nan:
  633. indices.append(q_num * (valid_counts - 1))
  634. else:
  635. index = q_num * (valid_counts - 1)
  636. last_index = x.shape[axis] - 1
  637. nums = paddle.full_like(index, fill_value=last_index)
  638. index = paddle.where(mask.any(axis=axis, keepdim=True), nums, index)
  639. indices.append(index)
  640. sorted_tensor = paddle.sort(x, axis)
  641. def _compute_index(index):
  642. if interpolation == "nearest":
  643. idx = paddle.round(index).astype(paddle.int32)
  644. return paddle.take_along_axis(sorted_tensor, idx, axis=axis)
  645. indices_below = paddle.floor(index).astype(paddle.int32)
  646. if interpolation != "higher":
  647. # avoid unnecessary compute
  648. tensor_below = paddle.take_along_axis(
  649. sorted_tensor, indices_below, axis=axis
  650. )
  651. if interpolation == "lower":
  652. return tensor_below
  653. indices_upper = paddle.ceil(index).astype(paddle.int32)
  654. tensor_upper = paddle.take_along_axis(
  655. sorted_tensor, indices_upper, axis=axis
  656. )
  657. if interpolation == "higher":
  658. return tensor_upper
  659. if interpolation == "midpoint":
  660. return (tensor_upper + tensor_below) / 2
  661. weights = (index - indices_below.astype(index.dtype)).astype(x.dtype)
  662. # "linear"
  663. return paddle.lerp(
  664. tensor_below.astype(x.dtype),
  665. tensor_upper.astype(x.dtype),
  666. weights,
  667. )
  668. outputs = []
  669. # TODO(chenjianye): replace the for-loop to directly take elements.
  670. for index in indices:
  671. out = _compute_index(index)
  672. if not keepdim:
  673. out = paddle.squeeze(out, axis=axis)
  674. else:
  675. out = out.reshape(out_shape)
  676. outputs.append(out)
  677. if len(outputs) > 1:
  678. outputs = paddle.stack(outputs, 0)
  679. else:
  680. outputs = outputs[0]
  681. # return outputs.astype(x.dtype)
  682. return outputs
  683. def quantile(x, q, axis=None, keepdim=False, interpolation="linear"):
  684. """
  685. Compute the quantile of the input along the specified axis.
  686. If any values in a reduced row are NaN, then the quantiles for that reduction will be NaN.
  687. Args:
  688. x (Tensor): The input Tensor, it's data type can be float32, float64, int32, int64.
  689. q (int|float|list|Tensor): The q for calculate quantile, which should be in range [0, 1]. If q is a list or
  690. a 1-D Tensor, each element of q will be calculated and the first dimension of output is same to the number of ``q`` .
  691. If q is a 0-D Tensor, it will be treated as an integer or float.
  692. axis (int|list, optional): The axis along which to calculate quantile. ``axis`` should be int or list of int.
  693. ``axis`` should be in range [-D, D), where D is the dimensions of ``x`` .
  694. If ``axis`` is less than 0, it works the same way as :math:`axis + D`.
  695. If ``axis`` is a list, quantile is calculated over all elements of given axises.
  696. If ``axis`` is None, quantile is calculated over all elements of ``x``. Default is None.
  697. keepdim (bool, optional): Whether to reserve the reduced dimension(s)
  698. in the output Tensor. If ``keepdim`` is True, the dimensions of
  699. the output Tensor is the same as ``x`` except in the reduced
  700. dimensions(it is of size 1 in this case). Otherwise, the shape of
  701. the output Tensor is squeezed in ``axis`` . Default is False.
  702. interpolation (str, optional): The interpolation method to use
  703. when the desired quantile falls between two data points. Must be one of linear, higher,
  704. lower, midpoint and nearest. Default is linear.
  705. name (str, optional): Name for the operation (optional, default is None).
  706. For more information, please refer to :ref:`api_guide_Name`.
  707. Returns:
  708. Tensor, results of quantile along ``axis`` of ``x``.
  709. Examples:
  710. .. code-block:: python
  711. >>> import paddle
  712. >>> y = paddle.arange(0, 8 ,dtype="float32").reshape([4, 2])
  713. >>> print(y)
  714. Tensor(shape=[4, 2], dtype=float32, place=Place(cpu), stop_gradient=True,
  715. [[0., 1.],
  716. [2., 3.],
  717. [4., 5.],
  718. [6., 7.]])
  719. >>> y1 = paddle.quantile(y, q=0.5, axis=[0, 1])
  720. >>> print(y1)
  721. Tensor(shape=[], dtype=float32, place=Place(cpu), stop_gradient=True,
  722. 3.50000000)
  723. >>> y2 = paddle.quantile(y, q=0.5, axis=1)
  724. >>> print(y2)
  725. Tensor(shape=[4], dtype=float32, place=Place(cpu), stop_gradient=True,
  726. [0.50000000, 2.50000000, 4.50000000, 6.50000000])
  727. >>> y3 = paddle.quantile(y, q=[0.3, 0.5], axis=0)
  728. >>> print(y3)
  729. Tensor(shape=[2, 2], dtype=float32, place=Place(cpu), stop_gradient=True,
  730. [[1.80000000, 2.80000000],
  731. [3. , 4. ]])
  732. >>> y[0,0] = float("nan")
  733. >>> y4 = paddle.quantile(y, q=0.8, axis=1, keepdim=True)
  734. >>> print(y4)
  735. Tensor(shape=[4, 1], dtype=float32, place=Place(cpu), stop_gradient=True,
  736. [[nan ],
  737. [2.80000000],
  738. [4.80000000],
  739. [6.80000000]])
  740. """
  741. return _compute_quantile(
  742. x,
  743. q,
  744. axis=axis,
  745. keepdim=keepdim,
  746. interpolation=interpolation,
  747. ignore_nan=False,
  748. )
  749. def nanquantile(x, q, axis=None, keepdim=False, interpolation="linear"):
  750. """
  751. Compute the quantile of the input as if NaN values in input did not exist.
  752. If all values in a reduced row are NaN, then the quantiles for that reduction will be NaN.
  753. Args:
  754. x (Tensor): The input Tensor, it's data type can be float32, float64, int32, int64.
  755. q (int|float|list|Tensor): The q for calculate quantile, which should be in range [0, 1]. If q is a list or
  756. a 1-D Tensor, each element of q will be calculated and the first dimension of output is same to the number of ``q`` .
  757. If q is a 0-D Tensor, it will be treated as an integer or float.
  758. axis (int|list, optional): The axis along which to calculate quantile. ``axis`` should be int or list of int.
  759. ``axis`` should be in range [-D, D), where D is the dimensions of ``x`` .
  760. If ``axis`` is less than 0, it works the same way as :math:`axis + D`.
  761. If ``axis`` is a list, quantile is calculated over all elements of given axises.
  762. If ``axis`` is None, quantile is calculated over all elements of ``x``. Default is None.
  763. keepdim (bool, optional): Whether to reserve the reduced dimension(s)
  764. in the output Tensor. If ``keepdim`` is True, the dimensions of
  765. the output Tensor is the same as ``x`` except in the reduced
  766. dimensions(it is of size 1 in this case). Otherwise, the shape of
  767. the output Tensor is squeezed in ``axis`` . Default is False.
  768. interpolation (str, optional): The interpolation method to use
  769. when the desired quantile falls between two data points. Must be one of linear, higher,
  770. lower, midpoint and nearest. Default is linear.
  771. name (str, optional): Name for the operation (optional, default is None).
  772. For more information, please refer to :ref:`api_guide_Name`.
  773. Returns:
  774. Tensor, results of quantile along ``axis`` of ``x``.
  775. Examples:
  776. .. code-block:: python
  777. >>> import paddle
  778. >>> x = paddle.to_tensor(
  779. ... [[0, 1, 2, 3, 4],
  780. ... [5, 6, 7, 8, 9]],
  781. ... dtype="float32")
  782. >>> x[0,0] = float("nan")
  783. >>> y1 = paddle.nanquantile(x, q=0.5, axis=[0, 1])
  784. >>> print(y1)
  785. Tensor(shape=[], dtype=float32, place=Place(cpu), stop_gradient=True,
  786. 5.)
  787. >>> y2 = paddle.nanquantile(x, q=0.5, axis=1)
  788. >>> print(y2)
  789. Tensor(shape=[2], dtype=float32, place=Place(cpu), stop_gradient=True,
  790. [2.50000000, 7. ])
  791. >>> y3 = paddle.nanquantile(x, q=[0.3, 0.5], axis=0)
  792. >>> print(y3)
  793. Tensor(shape=[2, 5], dtype=float32, place=Place(cpu), stop_gradient=True,
  794. [[5. , 2.50000000, 3.50000000, 4.50000000, 5.50000000],
  795. [5. , 3.50000000, 4.50000000, 5.50000000, 6.50000000]])
  796. >>> y4 = paddle.nanquantile(x, q=0.8, axis=1, keepdim=True)
  797. >>> print(y4)
  798. Tensor(shape=[2, 1], dtype=float32, place=Place(cpu), stop_gradient=True,
  799. [[3.40000000],
  800. [8.20000000]])
  801. >>> nan = paddle.full(shape=[2, 3], fill_value=float("nan"))
  802. >>> y5 = paddle.nanquantile(nan, q=0.8, axis=1, keepdim=True)
  803. >>> print(y5)
  804. Tensor(shape=[2, 1], dtype=float32, place=Place(cpu), stop_gradient=True,
  805. [[nan],
  806. [nan]])
  807. """
  808. return _compute_quantile(
  809. x,
  810. q,
  811. axis=axis,
  812. keepdim=keepdim,
  813. interpolation=interpolation,
  814. ignore_nan=True,
  815. )