arraylike.py 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530
  1. """
  2. Methods that can be shared by many array-like classes or subclasses:
  3. Series
  4. Index
  5. ExtensionArray
  6. """
  7. from __future__ import annotations
  8. import operator
  9. from typing import Any
  10. import numpy as np
  11. from pandas._libs import lib
  12. from pandas._libs.ops_dispatch import maybe_dispatch_ufunc_to_dunder_op
  13. from pandas.core.dtypes.generic import ABCNDFrame
  14. from pandas.core import roperator
  15. from pandas.core.construction import extract_array
  16. from pandas.core.ops.common import unpack_zerodim_and_defer
  17. REDUCTION_ALIASES = {
  18. "maximum": "max",
  19. "minimum": "min",
  20. "add": "sum",
  21. "multiply": "prod",
  22. }
  23. class OpsMixin:
  24. # -------------------------------------------------------------
  25. # Comparisons
  26. def _cmp_method(self, other, op):
  27. return NotImplemented
  28. @unpack_zerodim_and_defer("__eq__")
  29. def __eq__(self, other):
  30. return self._cmp_method(other, operator.eq)
  31. @unpack_zerodim_and_defer("__ne__")
  32. def __ne__(self, other):
  33. return self._cmp_method(other, operator.ne)
  34. @unpack_zerodim_and_defer("__lt__")
  35. def __lt__(self, other):
  36. return self._cmp_method(other, operator.lt)
  37. @unpack_zerodim_and_defer("__le__")
  38. def __le__(self, other):
  39. return self._cmp_method(other, operator.le)
  40. @unpack_zerodim_and_defer("__gt__")
  41. def __gt__(self, other):
  42. return self._cmp_method(other, operator.gt)
  43. @unpack_zerodim_and_defer("__ge__")
  44. def __ge__(self, other):
  45. return self._cmp_method(other, operator.ge)
  46. # -------------------------------------------------------------
  47. # Logical Methods
  48. def _logical_method(self, other, op):
  49. return NotImplemented
  50. @unpack_zerodim_and_defer("__and__")
  51. def __and__(self, other):
  52. return self._logical_method(other, operator.and_)
  53. @unpack_zerodim_and_defer("__rand__")
  54. def __rand__(self, other):
  55. return self._logical_method(other, roperator.rand_)
  56. @unpack_zerodim_and_defer("__or__")
  57. def __or__(self, other):
  58. return self._logical_method(other, operator.or_)
  59. @unpack_zerodim_and_defer("__ror__")
  60. def __ror__(self, other):
  61. return self._logical_method(other, roperator.ror_)
  62. @unpack_zerodim_and_defer("__xor__")
  63. def __xor__(self, other):
  64. return self._logical_method(other, operator.xor)
  65. @unpack_zerodim_and_defer("__rxor__")
  66. def __rxor__(self, other):
  67. return self._logical_method(other, roperator.rxor)
  68. # -------------------------------------------------------------
  69. # Arithmetic Methods
  70. def _arith_method(self, other, op):
  71. return NotImplemented
  72. @unpack_zerodim_and_defer("__add__")
  73. def __add__(self, other):
  74. """
  75. Get Addition of DataFrame and other, column-wise.
  76. Equivalent to ``DataFrame.add(other)``.
  77. Parameters
  78. ----------
  79. other : scalar, sequence, Series, dict or DataFrame
  80. Object to be added to the DataFrame.
  81. Returns
  82. -------
  83. DataFrame
  84. The result of adding ``other`` to DataFrame.
  85. See Also
  86. --------
  87. DataFrame.add : Add a DataFrame and another object, with option for index-
  88. or column-oriented addition.
  89. Examples
  90. --------
  91. >>> df = pd.DataFrame({'height': [1.5, 2.6], 'weight': [500, 800]},
  92. ... index=['elk', 'moose'])
  93. >>> df
  94. height weight
  95. elk 1.5 500
  96. moose 2.6 800
  97. Adding a scalar affects all rows and columns.
  98. >>> df[['height', 'weight']] + 1.5
  99. height weight
  100. elk 3.0 501.5
  101. moose 4.1 801.5
  102. Each element of a list is added to a column of the DataFrame, in order.
  103. >>> df[['height', 'weight']] + [0.5, 1.5]
  104. height weight
  105. elk 2.0 501.5
  106. moose 3.1 801.5
  107. Keys of a dictionary are aligned to the DataFrame, based on column names;
  108. each value in the dictionary is added to the corresponding column.
  109. >>> df[['height', 'weight']] + {'height': 0.5, 'weight': 1.5}
  110. height weight
  111. elk 2.0 501.5
  112. moose 3.1 801.5
  113. When `other` is a :class:`Series`, the index of `other` is aligned with the
  114. columns of the DataFrame.
  115. >>> s1 = pd.Series([0.5, 1.5], index=['weight', 'height'])
  116. >>> df[['height', 'weight']] + s1
  117. height weight
  118. elk 3.0 500.5
  119. moose 4.1 800.5
  120. Even when the index of `other` is the same as the index of the DataFrame,
  121. the :class:`Series` will not be reoriented. If index-wise alignment is desired,
  122. :meth:`DataFrame.add` should be used with `axis='index'`.
  123. >>> s2 = pd.Series([0.5, 1.5], index=['elk', 'moose'])
  124. >>> df[['height', 'weight']] + s2
  125. elk height moose weight
  126. elk NaN NaN NaN NaN
  127. moose NaN NaN NaN NaN
  128. >>> df[['height', 'weight']].add(s2, axis='index')
  129. height weight
  130. elk 2.0 500.5
  131. moose 4.1 801.5
  132. When `other` is a :class:`DataFrame`, both columns names and the
  133. index are aligned.
  134. >>> other = pd.DataFrame({'height': [0.2, 0.4, 0.6]},
  135. ... index=['elk', 'moose', 'deer'])
  136. >>> df[['height', 'weight']] + other
  137. height weight
  138. deer NaN NaN
  139. elk 1.7 NaN
  140. moose 3.0 NaN
  141. """
  142. return self._arith_method(other, operator.add)
  143. @unpack_zerodim_and_defer("__radd__")
  144. def __radd__(self, other):
  145. return self._arith_method(other, roperator.radd)
  146. @unpack_zerodim_and_defer("__sub__")
  147. def __sub__(self, other):
  148. return self._arith_method(other, operator.sub)
  149. @unpack_zerodim_and_defer("__rsub__")
  150. def __rsub__(self, other):
  151. return self._arith_method(other, roperator.rsub)
  152. @unpack_zerodim_and_defer("__mul__")
  153. def __mul__(self, other):
  154. return self._arith_method(other, operator.mul)
  155. @unpack_zerodim_and_defer("__rmul__")
  156. def __rmul__(self, other):
  157. return self._arith_method(other, roperator.rmul)
  158. @unpack_zerodim_and_defer("__truediv__")
  159. def __truediv__(self, other):
  160. return self._arith_method(other, operator.truediv)
  161. @unpack_zerodim_and_defer("__rtruediv__")
  162. def __rtruediv__(self, other):
  163. return self._arith_method(other, roperator.rtruediv)
  164. @unpack_zerodim_and_defer("__floordiv__")
  165. def __floordiv__(self, other):
  166. return self._arith_method(other, operator.floordiv)
  167. @unpack_zerodim_and_defer("__rfloordiv")
  168. def __rfloordiv__(self, other):
  169. return self._arith_method(other, roperator.rfloordiv)
  170. @unpack_zerodim_and_defer("__mod__")
  171. def __mod__(self, other):
  172. return self._arith_method(other, operator.mod)
  173. @unpack_zerodim_and_defer("__rmod__")
  174. def __rmod__(self, other):
  175. return self._arith_method(other, roperator.rmod)
  176. @unpack_zerodim_and_defer("__divmod__")
  177. def __divmod__(self, other):
  178. return self._arith_method(other, divmod)
  179. @unpack_zerodim_and_defer("__rdivmod__")
  180. def __rdivmod__(self, other):
  181. return self._arith_method(other, roperator.rdivmod)
  182. @unpack_zerodim_and_defer("__pow__")
  183. def __pow__(self, other):
  184. return self._arith_method(other, operator.pow)
  185. @unpack_zerodim_and_defer("__rpow__")
  186. def __rpow__(self, other):
  187. return self._arith_method(other, roperator.rpow)
  188. # -----------------------------------------------------------------------------
  189. # Helpers to implement __array_ufunc__
  190. def array_ufunc(self, ufunc: np.ufunc, method: str, *inputs: Any, **kwargs: Any):
  191. """
  192. Compatibility with numpy ufuncs.
  193. See also
  194. --------
  195. numpy.org/doc/stable/reference/arrays.classes.html#numpy.class.__array_ufunc__
  196. """
  197. from pandas.core.frame import (
  198. DataFrame,
  199. Series,
  200. )
  201. from pandas.core.generic import NDFrame
  202. from pandas.core.internals import (
  203. ArrayManager,
  204. BlockManager,
  205. )
  206. cls = type(self)
  207. kwargs = _standardize_out_kwarg(**kwargs)
  208. # for binary ops, use our custom dunder methods
  209. result = maybe_dispatch_ufunc_to_dunder_op(self, ufunc, method, *inputs, **kwargs)
  210. if result is not NotImplemented:
  211. return result
  212. # Determine if we should defer.
  213. no_defer = (
  214. np.ndarray.__array_ufunc__,
  215. cls.__array_ufunc__,
  216. )
  217. for item in inputs:
  218. higher_priority = (
  219. hasattr(item, "__array_priority__")
  220. and item.__array_priority__ > self.__array_priority__
  221. )
  222. has_array_ufunc = (
  223. hasattr(item, "__array_ufunc__")
  224. and type(item).__array_ufunc__ not in no_defer
  225. and not isinstance(item, self._HANDLED_TYPES)
  226. )
  227. if higher_priority or has_array_ufunc:
  228. return NotImplemented
  229. # align all the inputs.
  230. types = tuple(type(x) for x in inputs)
  231. alignable = [x for x, t in zip(inputs, types) if issubclass(t, NDFrame)]
  232. if len(alignable) > 1:
  233. # This triggers alignment.
  234. # At the moment, there aren't any ufuncs with more than two inputs
  235. # so this ends up just being x1.index | x2.index, but we write
  236. # it to handle *args.
  237. set_types = set(types)
  238. if len(set_types) > 1 and {DataFrame, Series}.issubset(set_types):
  239. # We currently don't handle ufunc(DataFrame, Series)
  240. # well. Previously this raised an internal ValueError. We might
  241. # support it someday, so raise a NotImplementedError.
  242. raise NotImplementedError(
  243. f"Cannot apply ufunc {ufunc} to mixed DataFrame and Series inputs."
  244. )
  245. axes = self.axes
  246. for obj in alignable[1:]:
  247. # this relies on the fact that we aren't handling mixed
  248. # series / frame ufuncs.
  249. for i, (ax1, ax2) in enumerate(zip(axes, obj.axes)):
  250. axes[i] = ax1.union(ax2)
  251. reconstruct_axes = dict(zip(self._AXIS_ORDERS, axes))
  252. inputs = tuple(
  253. x.reindex(**reconstruct_axes) if issubclass(t, NDFrame) else x
  254. for x, t in zip(inputs, types)
  255. )
  256. else:
  257. reconstruct_axes = dict(zip(self._AXIS_ORDERS, self.axes))
  258. if self.ndim == 1:
  259. names = [getattr(x, "name") for x in inputs if hasattr(x, "name")]
  260. name = names[0] if len(set(names)) == 1 else None
  261. reconstruct_kwargs = {"name": name}
  262. else:
  263. reconstruct_kwargs = {}
  264. def reconstruct(result):
  265. if ufunc.nout > 1:
  266. # np.modf, np.frexp, np.divmod
  267. return tuple(_reconstruct(x) for x in result)
  268. return _reconstruct(result)
  269. def _reconstruct(result):
  270. if lib.is_scalar(result):
  271. return result
  272. if result.ndim != self.ndim:
  273. if method == "outer":
  274. raise NotImplementedError
  275. return result
  276. if isinstance(result, (BlockManager, ArrayManager)):
  277. # we went through BlockManager.apply e.g. np.sqrt
  278. result = self._constructor_from_mgr(result, axes=result.axes)
  279. else:
  280. # we converted an array, lost our axes
  281. result = self._constructor(
  282. result, **reconstruct_axes, **reconstruct_kwargs, copy=False
  283. )
  284. # TODO: When we support multiple values in __finalize__, this
  285. # should pass alignable to `__finalize__` instead of self.
  286. # Then `np.add(a, b)` would consider attrs from both a and b
  287. # when a and b are NDFrames.
  288. if len(alignable) == 1:
  289. result = result.__finalize__(self)
  290. return result
  291. if "out" in kwargs:
  292. # e.g. test_multiindex_get_loc
  293. result = dispatch_ufunc_with_out(self, ufunc, method, *inputs, **kwargs)
  294. return reconstruct(result)
  295. if method == "reduce":
  296. # e.g. test.series.test_ufunc.test_reduce
  297. result = dispatch_reduction_ufunc(self, ufunc, method, *inputs, **kwargs)
  298. if result is not NotImplemented:
  299. return result
  300. # We still get here with kwargs `axis` for e.g. np.maximum.accumulate
  301. # and `dtype` and `keepdims` for np.ptp
  302. if self.ndim > 1 and (len(inputs) > 1 or ufunc.nout > 1):
  303. # Just give up on preserving types in the complex case.
  304. # In theory we could preserve them for them.
  305. # * nout>1 is doable if BlockManager.apply took nout and
  306. # returned a Tuple[BlockManager].
  307. # * len(inputs) > 1 is doable when we know that we have
  308. # aligned blocks / dtypes.
  309. # e.g. my_ufunc, modf, logaddexp, heaviside, subtract, add
  310. inputs = tuple(np.asarray(x) for x in inputs)
  311. # Note: we can't use default_array_ufunc here bc reindexing means
  312. # that `self` may not be among `inputs`
  313. result = getattr(ufunc, method)(*inputs, **kwargs)
  314. elif self.ndim == 1:
  315. # ufunc(series, ...)
  316. inputs = tuple(extract_array(x, extract_numpy=True) for x in inputs)
  317. result = getattr(ufunc, method)(*inputs, **kwargs)
  318. else:
  319. # ufunc(dataframe)
  320. if method == "__call__" and not kwargs:
  321. # for np.<ufunc>(..) calls
  322. # kwargs cannot necessarily be handled block-by-block, so only
  323. # take this path if there are no kwargs
  324. mgr = inputs[0]._mgr
  325. result = mgr.apply(getattr(ufunc, method))
  326. else:
  327. # otherwise specific ufunc methods (eg np.<ufunc>.accumulate(..))
  328. # Those can have an axis keyword and thus can't be called block-by-block
  329. result = default_array_ufunc(inputs[0], ufunc, method, *inputs, **kwargs)
  330. # e.g. np.negative (only one reached), with "where" and "out" in kwargs
  331. result = reconstruct(result)
  332. return result
  333. def _standardize_out_kwarg(**kwargs) -> dict:
  334. """
  335. If kwargs contain "out1" and "out2", replace that with a tuple "out"
  336. np.divmod, np.modf, np.frexp can have either `out=(out1, out2)` or
  337. `out1=out1, out2=out2)`
  338. """
  339. if "out" not in kwargs and "out1" in kwargs and "out2" in kwargs:
  340. out1 = kwargs.pop("out1")
  341. out2 = kwargs.pop("out2")
  342. out = (out1, out2)
  343. kwargs["out"] = out
  344. return kwargs
  345. def dispatch_ufunc_with_out(self, ufunc: np.ufunc, method: str, *inputs, **kwargs):
  346. """
  347. If we have an `out` keyword, then call the ufunc without `out` and then
  348. set the result into the given `out`.
  349. """
  350. # Note: we assume _standardize_out_kwarg has already been called.
  351. out = kwargs.pop("out")
  352. where = kwargs.pop("where", None)
  353. result = getattr(ufunc, method)(*inputs, **kwargs)
  354. if result is NotImplemented:
  355. return NotImplemented
  356. if isinstance(result, tuple):
  357. # i.e. np.divmod, np.modf, np.frexp
  358. if not isinstance(out, tuple) or len(out) != len(result):
  359. raise NotImplementedError
  360. for arr, res in zip(out, result):
  361. _assign_where(arr, res, where)
  362. return out
  363. if isinstance(out, tuple):
  364. if len(out) == 1:
  365. out = out[0]
  366. else:
  367. raise NotImplementedError
  368. _assign_where(out, result, where)
  369. return out
  370. def _assign_where(out, result, where) -> None:
  371. """
  372. Set a ufunc result into 'out', masking with a 'where' argument if necessary.
  373. """
  374. if where is None:
  375. # no 'where' arg passed to ufunc
  376. out[:] = result
  377. else:
  378. np.putmask(out, where, result)
  379. def default_array_ufunc(self, ufunc: np.ufunc, method: str, *inputs, **kwargs):
  380. """
  381. Fallback to the behavior we would get if we did not define __array_ufunc__.
  382. Notes
  383. -----
  384. We are assuming that `self` is among `inputs`.
  385. """
  386. if not any(x is self for x in inputs):
  387. raise NotImplementedError
  388. new_inputs = [x if x is not self else np.asarray(x) for x in inputs]
  389. return getattr(ufunc, method)(*new_inputs, **kwargs)
  390. def dispatch_reduction_ufunc(self, ufunc: np.ufunc, method: str, *inputs, **kwargs):
  391. """
  392. Dispatch ufunc reductions to self's reduction methods.
  393. """
  394. assert method == "reduce"
  395. if len(inputs) != 1 or inputs[0] is not self:
  396. return NotImplemented
  397. if ufunc.__name__ not in REDUCTION_ALIASES:
  398. return NotImplemented
  399. method_name = REDUCTION_ALIASES[ufunc.__name__]
  400. # NB: we are assuming that min/max represent minimum/maximum methods,
  401. # which would not be accurate for e.g. Timestamp.min
  402. if not hasattr(self, method_name):
  403. return NotImplemented
  404. if self.ndim > 1:
  405. if isinstance(self, ABCNDFrame):
  406. # TODO: test cases where this doesn't hold, i.e. 2D DTA/TDA
  407. kwargs["numeric_only"] = False
  408. if "axis" not in kwargs:
  409. # For DataFrame reductions we don't want the default axis=0
  410. # Note: np.min is not a ufunc, but uses array_function_dispatch,
  411. # so calls DataFrame.min (without ever getting here) with the np.min
  412. # default of axis=None, which DataFrame.min catches and changes to axis=0.
  413. # np.minimum.reduce(df) gets here bc axis is not in kwargs,
  414. # so we set axis=0 to match the behaviorof np.minimum.reduce(df.values)
  415. kwargs["axis"] = 0
  416. # By default, numpy's reductions do not skip NaNs, so we have to
  417. # pass skipna=False
  418. return getattr(self, method_name)(skipna=False, **kwargs)