| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945 |
- # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
- #
- # Licensed under the Apache License, Version 2.0 (the "License");
- # you may not use this file except in compliance with the License.
- # You may obtain a copy of the License at
- #
- # http://www.apache.org/licenses/LICENSE-2.0
- #
- # Unless required by applicable law or agreed to in writing, software
- # distributed under the License is distributed on an "AS IS" BASIS,
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- # See the License for the specific language governing permissions and
- # limitations under the License.
- # TODO: define statistical functions of a tensor
- import paddle
- from paddle import _C_ops
- from paddle.framework import (
- in_dynamic_mode,
- in_dynamic_or_pir_mode,
- )
- from ..base.data_feeder import check_type, check_variable_and_dtype
- from ..common_ops_import import Variable
- from ..framework import LayerHelper, core
- from .math import _get_reduce_axis_with_tensor
- from .search import where
- __all__ = []
- def mean(x, axis=None, keepdim=False, name=None):
- """
- Computes the mean of the input tensor's elements along ``axis``.
- Args:
- x (Tensor): The input Tensor with data type float32, float64.
- axis (int|list|tuple, optional): The axis along which to perform mean
- calculations. ``axis`` should be int, list(int) or tuple(int). If
- ``axis`` is a list/tuple of dimension(s), mean is calculated along
- all element(s) of ``axis`` . ``axis`` or element(s) of ``axis``
- should be in range [-D, D), where D is the dimensions of ``x`` . If
- ``axis`` or element(s) of ``axis`` is less than 0, it works the
- same way as :math:`axis + D` . If ``axis`` is None, mean is
- calculated over all elements of ``x``. Default is None.
- keepdim (bool, optional): Whether to reserve the reduced dimension(s)
- in the output Tensor. If ``keepdim`` is True, the dimensions of
- the output Tensor is the same as ``x`` except in the reduced
- dimensions(it is of size 1 in this case). Otherwise, the shape of
- the output Tensor is squeezed in ``axis`` . Default is False.
- name (str, optional): Name for the operation (optional, default is None).
- For more information, please refer to :ref:`api_guide_Name`.
- Returns:
- Tensor, results of average along ``axis`` of ``x``, with the same data
- type as ``x``.
- Examples:
- .. code-block:: python
- >>> import paddle
- >>> x = paddle.to_tensor([[[1., 2., 3., 4.],
- ... [5., 6., 7., 8.],
- ... [9., 10., 11., 12.]],
- ... [[13., 14., 15., 16.],
- ... [17., 18., 19., 20.],
- ... [21., 22., 23., 24.]]])
- >>> out1 = paddle.mean(x)
- >>> print(out1.numpy())
- 12.5
- >>> out2 = paddle.mean(x, axis=-1)
- >>> print(out2.numpy())
- [[ 2.5 6.5 10.5]
- [14.5 18.5 22.5]]
- >>> out3 = paddle.mean(x, axis=-1, keepdim=True)
- >>> print(out3.numpy())
- [[[ 2.5]
- [ 6.5]
- [10.5]]
- [[14.5]
- [18.5]
- [22.5]]]
- >>> out4 = paddle.mean(x, axis=[0, 2])
- >>> print(out4.numpy())
- [ 8.5 12.5 16.5]
- """
- if in_dynamic_or_pir_mode():
- return _C_ops.mean(x, axis, keepdim)
- else:
- reduce_all, axis = _get_reduce_axis_with_tensor(axis, x)
- check_variable_and_dtype(
- x,
- 'x/input',
- ['uint16', "int32", 'float16', 'float32', 'float64'],
- 'mean/reduce_mean',
- )
- check_type(
- axis, 'axis/dim', (int, list, tuple, Variable), 'mean/reduce_mean'
- )
- if isinstance(axis, (list, tuple)):
- for item in axis:
- check_type(
- item,
- 'elements of axis/dim',
- (int, Variable),
- 'mean/reduce_mean',
- )
- helper = LayerHelper('mean', **locals())
- attrs = {'dim': axis, 'keep_dim': keepdim, 'reduce_all': reduce_all}
- out = helper.create_variable_for_type_inference(x.dtype)
- helper.append_op(
- type='reduce_mean',
- inputs={'X': x},
- outputs={'Out': out},
- attrs=attrs,
- )
- return out
- def var(x, axis=None, unbiased=True, keepdim=False, name=None):
- """
- Computes the variance of ``x`` along ``axis`` .
- Args:
- x (Tensor): The input Tensor with data type float16, float32, float64.
- axis (int|list|tuple, optional): The axis along which to perform variance calculations. ``axis`` should be int, list(int) or tuple(int).
- - If ``axis`` is a list/tuple of dimension(s), variance is calculated along all element(s) of ``axis`` . ``axis`` or element(s) of ``axis`` should be in range [-D, D), where D is the dimensions of ``x`` .
- - If ``axis`` or element(s) of ``axis`` is less than 0, it works the same way as :math:`axis + D` .
- - If ``axis`` is None, variance is calculated over all elements of ``x``. Default is None.
- unbiased (bool, optional): Whether to use the unbiased estimation. If ``unbiased`` is True, the divisor used in the computation is :math:`N - 1`, where :math:`N` represents the number of elements along ``axis`` , otherwise the divisor is :math:`N`. Default is True.
- keep_dim (bool, optional): Whether to reserve the reduced dimension in the output Tensor. The result tensor will have one fewer dimension than the input unless keep_dim is true. Default is False.
- name (str, optional): Name for the operation (optional, default is None). For more information, please refer to :ref:`api_guide_Name`.
- Returns:
- Tensor, results of variance along ``axis`` of ``x``, with the same data type as ``x``.
- Examples:
- .. code-block:: python
- >>> import paddle
- >>> x = paddle.to_tensor([[1.0, 2.0, 3.0], [1.0, 4.0, 5.0]])
- >>> out1 = paddle.var(x)
- >>> print(out1.numpy())
- 2.6666667
- >>> out2 = paddle.var(x, axis=1)
- >>> print(out2.numpy())
- [1. 4.3333335]
- """
- if not in_dynamic_mode():
- check_variable_and_dtype(
- x, 'x', ['float16', 'float32', 'float64'], 'var'
- )
- u = mean(x, axis, True, name)
- out = paddle.sum(paddle.pow((x - u), 2), axis, keepdim=keepdim, name=name)
- dtype = x.dtype
- n = paddle.cast(paddle.numel(x), "int64") / paddle.cast(
- paddle.numel(out), "int64"
- )
- n = n.astype(dtype)
- if unbiased:
- one_const = paddle.ones([], x.dtype)
- n = where(n > one_const, n - 1.0, one_const)
- n.stop_gradient = True
- out /= n
- return out
- def std(x, axis=None, unbiased=True, keepdim=False, name=None):
- """
- Computes the standard-deviation of ``x`` along ``axis`` .
- Args:
- x (Tensor): The input Tensor with data type float16, float32, float64.
- axis (int|list|tuple, optional): The axis along which to perform
- standard-deviation calculations. ``axis`` should be int, list(int)
- or tuple(int). If ``axis`` is a list/tuple of dimension(s),
- standard-deviation is calculated along all element(s) of ``axis`` .
- ``axis`` or element(s) of ``axis`` should be in range [-D, D),
- where D is the dimensions of ``x`` . If ``axis`` or element(s) of
- ``axis`` is less than 0, it works the same way as :math:`axis + D` .
- If ``axis`` is None, standard-deviation is calculated over all
- elements of ``x``. Default is None.
- unbiased (bool, optional): Whether to use the unbiased estimation. If
- ``unbiased`` is True, the standard-deviation is calculated via the
- unbiased estimator. If ``unbiased`` is True, the divisor used in
- the computation is :math:`N - 1`, where :math:`N` represents the
- number of elements along ``axis`` , otherwise the divisor is
- :math:`N`. Default is True.
- keepdim (bool, optional): Whether to reserve the reduced dimension(s)
- in the output Tensor. If ``keepdim`` is True, the dimensions of
- the output Tensor is the same as ``x`` except in the reduced
- dimensions(it is of size 1 in this case). Otherwise, the shape of
- the output Tensor is squeezed in ``axis`` . Default is False.
- name (str, optional): Name for the operation (optional, default is None).
- For more information, please refer to :ref:`api_guide_Name`.
- Returns:
- Tensor, results of standard-deviation along ``axis`` of ``x``, with the
- same data type as ``x``.
- Examples:
- .. code-block:: python
- >>> import paddle
- >>> x = paddle.to_tensor([[1.0, 2.0, 3.0], [1.0, 4.0, 5.0]])
- >>> out1 = paddle.std(x)
- >>> print(out1.numpy())
- 1.6329932
- >>> out2 = paddle.std(x, unbiased=False)
- >>> print(out2.numpy())
- 1.490712
- >>> out3 = paddle.std(x, axis=1)
- >>> print(out3.numpy())
- [1. 2.081666]
- """
- if not in_dynamic_or_pir_mode():
- check_variable_and_dtype(
- x, 'x', ['float16', 'float32', 'float64'], 'std'
- )
- out = var(**locals())
- return paddle.sqrt(out)
- def numel(x, name=None):
- """
- Returns the number of elements for a tensor, which is a 0-D int64 Tensor with shape [].
- Args:
- x (Tensor): The input Tensor, it's data type can be bool, float16, float32, float64, int32, int64, complex64, complex128.
- name (str, optional): Name for the operation (optional, default is None).
- For more information, please refer to :ref:`api_guide_Name`.
- Returns:
- Tensor: The number of elements for the input Tensor, whose shape is [].
- Examples:
- .. code-block:: python
- >>> import paddle
- >>> x = paddle.full(shape=[4, 5, 7], fill_value=0, dtype='int32')
- >>> numel = paddle.numel(x)
- >>> print(numel.numpy())
- 140
- """
- if in_dynamic_or_pir_mode():
- return _C_ops.numel(x)
- else:
- if not isinstance(x, Variable):
- raise TypeError("x must be a Tensor in numel")
- helper = LayerHelper('numel', **locals())
- out = helper.create_variable_for_type_inference(
- dtype=core.VarDesc.VarType.INT64
- )
- helper.append_op(type='size', inputs={'Input': x}, outputs={'Out': out})
- return out
- def nanmedian(x, axis=None, keepdim=False, mode='avg', name=None):
- r"""
- Compute the median along the specified axis, while ignoring NaNs.
- If the valid count of elements is a even number,
- the average value of both elements in the middle is calculated as the median.
- Args:
- x (Tensor): The input Tensor, it's data type can be int32, int64, float16, bfloat16, float32, float64.
- axis (None|int|list|tuple, optional):
- The axis along which to perform median calculations ``axis`` should be int or list of int.
- ``axis`` should be in range [-D, D), where D is the dimensions of ``x`` .
- If ``axis`` is less than 0, it works the same way as :math:`axis + D`.
- If ``axis`` is None, median is calculated over all elements of ``x``. Default is None.
- keepdim (bool, optional): Whether to reserve the reduced dimension(s)
- in the output Tensor. If ``keepdim`` is True, the dimensions of
- the output Tensor is the same as ``x`` except in the reduced
- dimensions(it is of size 1 in this case). Otherwise, the shape of
- the output Tensor is squeezed in ``axis`` . Default is False.
- mode (str, optional): Whether to use mean or min operation to calculate
- the nanmedian values when the input tensor has an even number of non-NaN elements
- along the dimension ``axis``. Support 'avg' and 'min'. Default is 'avg'.
- name (str, optional): Name for the operation (optional, default is None).
- For more information, please refer to :ref:`api_guide_Name`.
- Returns:
- Tensor or tuple of Tensor. If ``mode`` == 'min' and ``axis`` is int, the result
- will be a tuple of two tensors (nanmedian value and nanmedian index). Otherwise,
- only nanmedian value will be returned.
- Examples:
- .. code-block:: python
- >>> import paddle
- >>> x = paddle.to_tensor([[float('nan'), 2. , 3. ], [0. , 1. , 2. ]])
- >>> y1 = x.nanmedian()
- >>> print(y1.numpy())
- 2.0
- >>> y2 = x.nanmedian(0)
- >>> print(y2.numpy())
- [0. 1.5 2.5]
- >>> y3 = x.nanmedian(0, keepdim=True)
- >>> print(y3.numpy())
- [[0. 1.5 2.5]]
- >>> y4 = x.nanmedian((0, 1))
- >>> print(y4.numpy())
- 2.0
- >>> y5 = x.nanmedian(mode='min')
- >>> print(y5.numpy())
- 2.0
- >>> y6, y6_index = x.nanmedian(0, mode='min')
- >>> print(y6.numpy())
- [0. 1. 2.]
- >>> print(y6_index.numpy())
- [1 1 1]
- >>> y7, y7_index = x.nanmedian(1, mode='min')
- >>> print(y7.numpy())
- [2. 1.]
- >>> print(y7_index.numpy())
- [1 1]
- >>> y8 = x.nanmedian((0,1), mode='min')
- >>> print(y8.numpy())
- 2.0
- """
- if not isinstance(x, (Variable, paddle.pir.Value)):
- raise TypeError("In median, the input x should be a Tensor.")
- if isinstance(axis, (list, tuple)) and len(axis) == 0:
- raise ValueError("Axis list should not be empty.")
- if mode not in ('avg', 'min'):
- raise ValueError(f"Mode {mode} is not supported. Must be avg or min.")
- need_index = (axis is not None) and (not isinstance(axis, (list, tuple)))
- if axis is None:
- axis = []
- elif isinstance(axis, tuple):
- axis = list(axis)
- elif isinstance(axis, int):
- axis = [axis]
- if in_dynamic_or_pir_mode():
- out, indices = _C_ops.nanmedian(x, axis, keepdim, mode)
- indices.stop_gradient = True
- else:
- check_variable_and_dtype(
- x,
- 'X',
- ['int32', 'int64', 'float16', 'float32', 'float64', 'uint16'],
- 'nanmedian',
- )
- helper = LayerHelper('nanmedian', **locals())
- attrs = {'axis': axis, 'keepdim': keepdim, 'mode': mode}
- out = helper.create_variable_for_type_inference(x.dtype)
- indices = helper.create_variable_for_type_inference(paddle.int64)
- helper.append_op(
- type='nanmedian',
- inputs={'X': x},
- outputs={'Out': out, 'MedianIndex': indices},
- attrs=attrs,
- )
- indices.stop_gradient = True
- if mode == 'min' and need_index:
- return out, indices
- else:
- return out
- def median(x, axis=None, keepdim=False, mode='avg', name=None):
- """
- Compute the median along the specified axis.
- Args:
- x (Tensor): The input Tensor, it's data type can be float16, float32, float64, int32, int64.
- axis (int, optional): The axis along which to perform median calculations ``axis`` should be int.
- ``axis`` should be in range [-D, D), where D is the dimensions of ``x`` .
- If ``axis`` is less than 0, it works the same way as :math:`axis + D`.
- If ``axis`` is None, median is calculated over all elements of ``x``. Default is None.
- keepdim (bool, optional): Whether to reserve the reduced dimension(s)
- in the output Tensor. If ``keepdim`` is True, the dimensions of
- the output Tensor is the same as ``x`` except in the reduced
- dimensions(it is of size 1 in this case). Otherwise, the shape of
- the output Tensor is squeezed in ``axis`` . Default is False.
- mode (str, optional): Whether to use mean or min operation to calculate
- the median values when the input tensor has an even number of elements
- in the dimension ``axis``. Support 'avg' and 'min'. Default is 'avg'.
- name (str, optional): Name for the operation (optional, default is None).
- For more information, please refer to :ref:`api_guide_Name`.
- Returns:
- Tensor or tuple of Tensor.
- If ``mode`` == 'avg', the result will be the tensor of median values;
- If ``mode`` == 'min' and ``axis`` is None, the result will be the tensor of median values;
- If ``mode`` == 'min' and ``axis`` is not None, the result will be a tuple of two tensors
- containing median values and their indices.
- When ``mode`` == 'avg', if data type of ``x`` is float64, data type of median values will be float64,
- otherwise data type of median values will be float32.
- When ``mode`` == 'min', the data type of median values will be the same as ``x``. The data type of
- indices will be int64.
- Examples:
- .. code-block:: python
- >>> import paddle
- >>> import numpy as np
- >>> x = paddle.arange(12).reshape([3, 4])
- >>> print(x)
- Tensor(shape=[3, 4], dtype=int64, place=Place(cpu), stop_gradient=True,
- [[0 , 1 , 2 , 3 ],
- [4 , 5 , 6 , 7 ],
- [8 , 9 , 10, 11]])
- >>> y1 = paddle.median(x)
- >>> print(y1)
- Tensor(shape=[], dtype=float32, place=Place(cpu), stop_gradient=True,
- 5.50000000)
- >>> y2 = paddle.median(x, axis=0)
- >>> print(y2)
- Tensor(shape=[4], dtype=float32, place=Place(cpu), stop_gradient=True,
- [4., 5., 6., 7.])
- >>> y3 = paddle.median(x, axis=1)
- >>> print(y3)
- Tensor(shape=[3], dtype=float32, place=Place(cpu), stop_gradient=True,
- [1.50000000, 5.50000000, 9.50000000])
- >>> y4 = paddle.median(x, axis=0, keepdim=True)
- >>> print(y4)
- Tensor(shape=[1, 4], dtype=float32, place=Place(cpu), stop_gradient=True,
- [[4., 5., 6., 7.]])
- >>> y5 = paddle.median(x, mode='min')
- >>> print(y5)
- Tensor(shape=[], dtype=int64, place=Place(cpu), stop_gradient=True,
- 5)
- >>> median_value, median_indices = paddle.median(x, axis=1, mode='min')
- >>> print(median_value)
- Tensor(shape=[3], dtype=int64, place=Place(cpu), stop_gradient=True,
- [1, 5, 9])
- >>> print(median_indices)
- Tensor(shape=[3], dtype=int64, place=Place(cpu), stop_gradient=True,
- [1, 1, 1])
- >>> # cases containing nan values
- >>> x = paddle.to_tensor(np.array([[1,float('nan'),3,float('nan')],[1,2,3,4],[float('nan'),1,2,3]]))
- >>> y6 = paddle.median(x, axis=-1, keepdim=True)
- >>> print(y6)
- Tensor(shape=[3, 1], dtype=float64, place=Place(cpu), stop_gradient=True,
- [[nan ],
- [2.50000000],
- [nan ]])
- >>> median_value, median_indices = paddle.median(x, axis=1, keepdim=True, mode='min')
- >>> print(median_value)
- Tensor(shape=[3, 1], dtype=float64, place=Place(cpu), stop_gradient=True,
- [[nan],
- [2. ],
- [nan]])
- >>> print(median_indices)
- Tensor(shape=[3, 1], dtype=int64, place=Place(cpu), stop_gradient=True,
- [[1],
- [1],
- [0]])
- """
- if not isinstance(x, (Variable, paddle.pir.Value)):
- raise TypeError("In median, the input x should be a Tensor.")
- if in_dynamic_mode() and x.size == 0:
- # TODO: Currently, `__eq__` don't support arguments (`pir.Value` & `int`)
- raise ValueError("In median, the size of input x should not be 0.")
- is_flatten = False
- dims = len(x.shape)
- if dims == 0:
- assert axis in [
- -1,
- 0,
- None,
- ], 'when input 0-D, axis can only be [-1, 0] or default None'
- is_flatten = True
- if mode not in ('avg', 'min'):
- raise ValueError(f"Mode {mode} is not supported. Must be avg or min.")
- need_idx = axis is not None
- if axis is None:
- is_flatten = True
- if is_flatten:
- x = paddle.flatten(x)
- axis = 0
- else:
- if not isinstance(axis, int) or not (axis < dims and axis >= -dims):
- raise ValueError(
- "In median, axis should be none or an integer in range [-rank(x), rank(x))."
- )
- if axis < 0:
- axis += dims
- sz = x.shape[axis]
- kth = sz >> 1
- tensor_topk, idx = paddle.topk(x, kth + 1, axis=axis, largest=False)
- if mode == 'avg':
- dtype = (
- 'float64'
- if x.dtype
- in [core.VarDesc.VarType.FP64, paddle.base.core.DataType.FLOAT64]
- else 'float32'
- )
- if sz & 1 == 0:
- out_tensor = paddle.slice(
- tensor_topk, axes=[axis], starts=[kth - 1], ends=[kth]
- ) + paddle.slice(
- tensor_topk, axes=[axis], starts=[kth], ends=[kth + 1]
- )
- out_tensor = paddle.cast(out_tensor, dtype=dtype) / 2
- else:
- out_tensor = paddle.cast(
- paddle.slice(
- tensor_topk, axes=[axis], starts=[kth], ends=[kth + 1]
- ),
- dtype=dtype,
- )
- out_tensor = out_tensor + paddle.sum(
- paddle.cast(paddle.isnan(x), dtype=dtype) * x.astype(dtype),
- axis=axis,
- keepdim=True,
- )
- else: # mode == 'min'
- if sz & 1 == 0:
- out_tensor = paddle.slice(
- tensor_topk, axes=[axis], starts=[kth - 1], ends=[kth]
- )
- if need_idx:
- out_idx = paddle.slice(
- idx, axes=[axis], starts=[kth - 1], ends=[kth]
- )
- else:
- out_tensor = paddle.slice(
- tensor_topk, axes=[axis], starts=[kth], ends=[kth + 1]
- )
- if need_idx:
- out_idx = paddle.slice(
- idx, axes=[axis], starts=[kth], ends=[kth + 1]
- )
- # if contain nan on axis, return nan for that axis
- out_tensor = out_tensor + paddle.sum(
- paddle.cast(paddle.isnan(x), dtype=x.dtype) * x,
- axis=axis,
- keepdim=True,
- ).astype(x.dtype)
- if need_idx:
- # replace index using the first nan value's index on axis for out_idx
- # topk is not stable on cpu device, use argsort instead
- x_isnan = paddle.isnan(x).astype("int64")
- x_all_zero = paddle.zeros_like(x_isnan)
- index_along_axis = paddle.argsort(
- x_all_zero, axis=axis, stable=True
- )
- # find the index of the leading one in x_isnan
- cumsum = x_isnan.cumsum(axis=axis)
- x_isnan = x_isnan * paddle.where(cumsum > 1, 0, 1)
- nan_index = paddle.sum(
- index_along_axis * x_isnan, axis=axis, keepdim=True
- )
- nan_index_mask = paddle.sum(x_isnan, axis=axis, keepdim=True)
- out_idx = (
- out_idx * paddle.logical_not(nan_index_mask).astype('int64')
- + nan_index
- )
- if is_flatten:
- if keepdim:
- out_tensor = out_tensor.reshape([1] * dims)
- else:
- out_tensor = out_tensor.reshape([])
- else:
- if not keepdim:
- out_tensor = out_tensor.squeeze(axis)
- if mode == 'min' and need_idx:
- if not keepdim:
- out_idx = out_idx.squeeze(axis)
- return out_tensor, out_idx
- return out_tensor
- def _compute_quantile(
- x, q, axis=None, keepdim=False, interpolation="linear", ignore_nan=False
- ):
- """
- Compute the quantile of the input along the specified axis.
- Args:
- x (Tensor): The input Tensor, it's data type can be float32, float64, int32, int64.
- q (int|float|list|Tensor): The q for calculate quantile, which should be in range [0, 1]. If q is a list or
- a 1-D Tensor, each element of q will be calculated and the first dimension of output is same to the number of ``q`` .
- If q is a 0-D Tensor, it will be treated as an integer or float.
- axis (int|list, optional): The axis along which to calculate quantile. ``axis`` should be int or list of int.
- ``axis`` should be in range [-D, D), where D is the dimensions of ``x`` .
- If ``axis`` is less than 0, it works the same way as :math:`axis + D`.
- If ``axis`` is a list, quantile is calculated over all elements of given axises.
- If ``axis`` is None, quantile is calculated over all elements of ``x``. Default is None.
- keepdim (bool, optional): Whether to reserve the reduced dimension(s)
- in the output Tensor. If ``keepdim`` is True, the dimensions of
- the output Tensor is the same as ``x`` except in the reduced
- dimensions(it is of size 1 in this case). Otherwise, the shape of
- the output Tensor is squeezed in ``axis`` . Default is False.
- interpolation (str, optional): The interpolation method to use
- when the desired quantile falls between two data points. Must be one of linear, higher,
- lower, midpoint and nearest. Default is linear.
- ignore_nan: (bool, optional): Whether to ignore NaN of input Tensor.
- If ``ignore_nan`` is True, it will calculate nanquantile.
- Otherwise it will calculate quantile. Default is False.
- Returns:
- Tensor, results of quantile along ``axis`` of ``x``.
- In order to obtain higher precision, data type of results will be float64.
- """
- # Validate x
- if not isinstance(x, (Variable, paddle.pir.Value)):
- raise TypeError("input x should be a Tensor.")
- # Validate q
- if isinstance(q, (int, float)):
- q = [q]
- elif isinstance(q, (list, tuple)):
- if len(q) <= 0:
- raise ValueError("q should not be empty")
- elif isinstance(q, Variable):
- if len(q.shape) > 1:
- raise ValueError("q should be a 0-D tensor or a 1-D tensor")
- if len(q.shape) == 0:
- q = [q]
- else:
- raise TypeError(
- "Type of q should be int, float, list or tuple, or tensor"
- )
- for q_num in q:
- # we do not validate tensor q in static mode
- if not in_dynamic_or_pir_mode() and isinstance(q_num, Variable):
- break
- if q_num < 0 or q_num > 1:
- raise ValueError("q should be in range [0, 1]")
- if interpolation not in [
- "linear",
- "lower",
- "higher",
- "nearest",
- "midpoint",
- ]:
- raise ValueError(
- f"interpolation must be one of 'linear', 'lower', 'higher', 'nearest' or 'midpoint', but got {interpolation}"
- )
- # Validate axis
- dims = len(x.shape)
- out_shape = list(x.shape)
- if axis is None:
- x = paddle.flatten(x)
- axis = 0
- out_shape = [1] * dims
- else:
- if isinstance(axis, list):
- axis_src, axis_dst = [], []
- for axis_single in axis:
- if not isinstance(axis_single, int) or not (
- axis_single < dims and axis_single >= -dims
- ):
- raise ValueError(
- "Axis should be None, int, or a list, element should in range [-rank(x), rank(x))."
- )
- if axis_single < 0:
- axis_single = axis_single + dims
- axis_src.append(axis_single)
- out_shape[axis_single] = 1
- axis_dst = list(range(-len(axis), 0))
- x = paddle.moveaxis(x, axis_src, axis_dst)
- if len(axis_dst) == 0:
- x = paddle.flatten(x)
- axis = 0
- else:
- x = paddle.flatten(x, axis_dst[0], axis_dst[-1])
- axis = axis_dst[0]
- else:
- if not isinstance(axis, int) or not (axis < dims and axis >= -dims):
- raise ValueError(
- "Axis should be None, int, or a list, element should in range [-rank(x), rank(x))."
- )
- if axis < 0:
- axis += dims
- out_shape[axis] = 1
- mask = x.isnan()
- valid_counts = mask.logical_not().sum(axis=axis, keepdim=True)
- indices = []
- for q_num in q:
- if in_dynamic_or_pir_mode():
- q_num = paddle.to_tensor(q_num, dtype=x.dtype)
- if ignore_nan:
- indices.append(q_num * (valid_counts - 1))
- else:
- index = q_num * (valid_counts - 1)
- last_index = x.shape[axis] - 1
- nums = paddle.full_like(index, fill_value=last_index)
- index = paddle.where(mask.any(axis=axis, keepdim=True), nums, index)
- indices.append(index)
- sorted_tensor = paddle.sort(x, axis)
- def _compute_index(index):
- if interpolation == "nearest":
- idx = paddle.round(index).astype(paddle.int32)
- return paddle.take_along_axis(sorted_tensor, idx, axis=axis)
- indices_below = paddle.floor(index).astype(paddle.int32)
- if interpolation != "higher":
- # avoid unnecessary compute
- tensor_below = paddle.take_along_axis(
- sorted_tensor, indices_below, axis=axis
- )
- if interpolation == "lower":
- return tensor_below
- indices_upper = paddle.ceil(index).astype(paddle.int32)
- tensor_upper = paddle.take_along_axis(
- sorted_tensor, indices_upper, axis=axis
- )
- if interpolation == "higher":
- return tensor_upper
- if interpolation == "midpoint":
- return (tensor_upper + tensor_below) / 2
- weights = (index - indices_below.astype(index.dtype)).astype(x.dtype)
- # "linear"
- return paddle.lerp(
- tensor_below.astype(x.dtype),
- tensor_upper.astype(x.dtype),
- weights,
- )
- outputs = []
- # TODO(chenjianye): replace the for-loop to directly take elements.
- for index in indices:
- out = _compute_index(index)
- if not keepdim:
- out = paddle.squeeze(out, axis=axis)
- else:
- out = out.reshape(out_shape)
- outputs.append(out)
- if len(outputs) > 1:
- outputs = paddle.stack(outputs, 0)
- else:
- outputs = outputs[0]
- # return outputs.astype(x.dtype)
- return outputs
- def quantile(x, q, axis=None, keepdim=False, interpolation="linear"):
- """
- Compute the quantile of the input along the specified axis.
- If any values in a reduced row are NaN, then the quantiles for that reduction will be NaN.
- Args:
- x (Tensor): The input Tensor, it's data type can be float32, float64, int32, int64.
- q (int|float|list|Tensor): The q for calculate quantile, which should be in range [0, 1]. If q is a list or
- a 1-D Tensor, each element of q will be calculated and the first dimension of output is same to the number of ``q`` .
- If q is a 0-D Tensor, it will be treated as an integer or float.
- axis (int|list, optional): The axis along which to calculate quantile. ``axis`` should be int or list of int.
- ``axis`` should be in range [-D, D), where D is the dimensions of ``x`` .
- If ``axis`` is less than 0, it works the same way as :math:`axis + D`.
- If ``axis`` is a list, quantile is calculated over all elements of given axises.
- If ``axis`` is None, quantile is calculated over all elements of ``x``. Default is None.
- keepdim (bool, optional): Whether to reserve the reduced dimension(s)
- in the output Tensor. If ``keepdim`` is True, the dimensions of
- the output Tensor is the same as ``x`` except in the reduced
- dimensions(it is of size 1 in this case). Otherwise, the shape of
- the output Tensor is squeezed in ``axis`` . Default is False.
- interpolation (str, optional): The interpolation method to use
- when the desired quantile falls between two data points. Must be one of linear, higher,
- lower, midpoint and nearest. Default is linear.
- name (str, optional): Name for the operation (optional, default is None).
- For more information, please refer to :ref:`api_guide_Name`.
- Returns:
- Tensor, results of quantile along ``axis`` of ``x``.
- Examples:
- .. code-block:: python
- >>> import paddle
- >>> y = paddle.arange(0, 8 ,dtype="float32").reshape([4, 2])
- >>> print(y)
- Tensor(shape=[4, 2], dtype=float32, place=Place(cpu), stop_gradient=True,
- [[0., 1.],
- [2., 3.],
- [4., 5.],
- [6., 7.]])
- >>> y1 = paddle.quantile(y, q=0.5, axis=[0, 1])
- >>> print(y1)
- Tensor(shape=[], dtype=float32, place=Place(cpu), stop_gradient=True,
- 3.50000000)
- >>> y2 = paddle.quantile(y, q=0.5, axis=1)
- >>> print(y2)
- Tensor(shape=[4], dtype=float32, place=Place(cpu), stop_gradient=True,
- [0.50000000, 2.50000000, 4.50000000, 6.50000000])
- >>> y3 = paddle.quantile(y, q=[0.3, 0.5], axis=0)
- >>> print(y3)
- Tensor(shape=[2, 2], dtype=float32, place=Place(cpu), stop_gradient=True,
- [[1.80000000, 2.80000000],
- [3. , 4. ]])
- >>> y[0,0] = float("nan")
- >>> y4 = paddle.quantile(y, q=0.8, axis=1, keepdim=True)
- >>> print(y4)
- Tensor(shape=[4, 1], dtype=float32, place=Place(cpu), stop_gradient=True,
- [[nan ],
- [2.80000000],
- [4.80000000],
- [6.80000000]])
- """
- return _compute_quantile(
- x,
- q,
- axis=axis,
- keepdim=keepdim,
- interpolation=interpolation,
- ignore_nan=False,
- )
- def nanquantile(x, q, axis=None, keepdim=False, interpolation="linear"):
- """
- Compute the quantile of the input as if NaN values in input did not exist.
- If all values in a reduced row are NaN, then the quantiles for that reduction will be NaN.
- Args:
- x (Tensor): The input Tensor, it's data type can be float32, float64, int32, int64.
- q (int|float|list|Tensor): The q for calculate quantile, which should be in range [0, 1]. If q is a list or
- a 1-D Tensor, each element of q will be calculated and the first dimension of output is same to the number of ``q`` .
- If q is a 0-D Tensor, it will be treated as an integer or float.
- axis (int|list, optional): The axis along which to calculate quantile. ``axis`` should be int or list of int.
- ``axis`` should be in range [-D, D), where D is the dimensions of ``x`` .
- If ``axis`` is less than 0, it works the same way as :math:`axis + D`.
- If ``axis`` is a list, quantile is calculated over all elements of given axises.
- If ``axis`` is None, quantile is calculated over all elements of ``x``. Default is None.
- keepdim (bool, optional): Whether to reserve the reduced dimension(s)
- in the output Tensor. If ``keepdim`` is True, the dimensions of
- the output Tensor is the same as ``x`` except in the reduced
- dimensions(it is of size 1 in this case). Otherwise, the shape of
- the output Tensor is squeezed in ``axis`` . Default is False.
- interpolation (str, optional): The interpolation method to use
- when the desired quantile falls between two data points. Must be one of linear, higher,
- lower, midpoint and nearest. Default is linear.
- name (str, optional): Name for the operation (optional, default is None).
- For more information, please refer to :ref:`api_guide_Name`.
- Returns:
- Tensor, results of quantile along ``axis`` of ``x``.
- Examples:
- .. code-block:: python
- >>> import paddle
- >>> x = paddle.to_tensor(
- ... [[0, 1, 2, 3, 4],
- ... [5, 6, 7, 8, 9]],
- ... dtype="float32")
- >>> x[0,0] = float("nan")
- >>> y1 = paddle.nanquantile(x, q=0.5, axis=[0, 1])
- >>> print(y1)
- Tensor(shape=[], dtype=float32, place=Place(cpu), stop_gradient=True,
- 5.)
- >>> y2 = paddle.nanquantile(x, q=0.5, axis=1)
- >>> print(y2)
- Tensor(shape=[2], dtype=float32, place=Place(cpu), stop_gradient=True,
- [2.50000000, 7. ])
- >>> y3 = paddle.nanquantile(x, q=[0.3, 0.5], axis=0)
- >>> print(y3)
- Tensor(shape=[2, 5], dtype=float32, place=Place(cpu), stop_gradient=True,
- [[5. , 2.50000000, 3.50000000, 4.50000000, 5.50000000],
- [5. , 3.50000000, 4.50000000, 5.50000000, 6.50000000]])
- >>> y4 = paddle.nanquantile(x, q=0.8, axis=1, keepdim=True)
- >>> print(y4)
- Tensor(shape=[2, 1], dtype=float32, place=Place(cpu), stop_gradient=True,
- [[3.40000000],
- [8.20000000]])
- >>> nan = paddle.full(shape=[2, 3], fill_value=float("nan"))
- >>> y5 = paddle.nanquantile(nan, q=0.8, axis=1, keepdim=True)
- >>> print(y5)
- Tensor(shape=[2, 1], dtype=float32, place=Place(cpu), stop_gradient=True,
- [[nan],
- [nan]])
- """
- return _compute_quantile(
- x,
- q,
- axis=axis,
- keepdim=keepdim,
- interpolation=interpolation,
- ignore_nan=True,
- )
|