common.py 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657
  1. """
  2. Misc tools for implementing data structures
  3. Note: pandas.core.common is *not* part of the public API.
  4. """
  5. from __future__ import annotations
  6. import builtins
  7. from collections import (
  8. abc,
  9. defaultdict,
  10. )
  11. from collections.abc import (
  12. Collection,
  13. Generator,
  14. Hashable,
  15. Iterable,
  16. Sequence,
  17. )
  18. import contextlib
  19. from functools import partial
  20. import inspect
  21. from typing import (
  22. TYPE_CHECKING,
  23. Any,
  24. Callable,
  25. cast,
  26. overload,
  27. )
  28. import warnings
  29. import numpy as np
  30. from pandas._libs import lib
  31. from pandas.compat.numpy import np_version_gte1p24
  32. from pandas.core.dtypes.cast import construct_1d_object_array_from_listlike
  33. from pandas.core.dtypes.common import (
  34. is_bool_dtype,
  35. is_integer,
  36. )
  37. from pandas.core.dtypes.generic import (
  38. ABCExtensionArray,
  39. ABCIndex,
  40. ABCMultiIndex,
  41. ABCSeries,
  42. )
  43. from pandas.core.dtypes.inference import iterable_not_string
  44. if TYPE_CHECKING:
  45. from pandas._typing import (
  46. AnyArrayLike,
  47. ArrayLike,
  48. NpDtype,
  49. RandomState,
  50. T,
  51. )
  52. from pandas import Index
  53. def flatten(line):
  54. """
  55. Flatten an arbitrarily nested sequence.
  56. Parameters
  57. ----------
  58. line : sequence
  59. The non string sequence to flatten
  60. Notes
  61. -----
  62. This doesn't consider strings sequences.
  63. Returns
  64. -------
  65. flattened : generator
  66. """
  67. for element in line:
  68. if iterable_not_string(element):
  69. yield from flatten(element)
  70. else:
  71. yield element
  72. def consensus_name_attr(objs):
  73. name = objs[0].name
  74. for obj in objs[1:]:
  75. try:
  76. if obj.name != name:
  77. name = None
  78. except ValueError:
  79. name = None
  80. return name
  81. def is_bool_indexer(key: Any) -> bool:
  82. """
  83. Check whether `key` is a valid boolean indexer.
  84. Parameters
  85. ----------
  86. key : Any
  87. Only list-likes may be considered boolean indexers.
  88. All other types are not considered a boolean indexer.
  89. For array-like input, boolean ndarrays or ExtensionArrays
  90. with ``_is_boolean`` set are considered boolean indexers.
  91. Returns
  92. -------
  93. bool
  94. Whether `key` is a valid boolean indexer.
  95. Raises
  96. ------
  97. ValueError
  98. When the array is an object-dtype ndarray or ExtensionArray
  99. and contains missing values.
  100. See Also
  101. --------
  102. check_array_indexer : Check that `key` is a valid array to index,
  103. and convert to an ndarray.
  104. """
  105. if isinstance(
  106. key, (ABCSeries, np.ndarray, ABCIndex, ABCExtensionArray)
  107. ) and not isinstance(key, ABCMultiIndex):
  108. if key.dtype == np.object_:
  109. key_array = np.asarray(key)
  110. if not lib.is_bool_array(key_array):
  111. na_msg = "Cannot mask with non-boolean array containing NA / NaN values"
  112. if lib.is_bool_array(key_array, skipna=True):
  113. # Don't raise on e.g. ["A", "B", np.nan], see
  114. # test_loc_getitem_list_of_labels_categoricalindex_with_na
  115. raise ValueError(na_msg)
  116. return False
  117. return True
  118. elif is_bool_dtype(key.dtype):
  119. return True
  120. elif isinstance(key, list):
  121. # check if np.array(key).dtype would be bool
  122. if len(key) > 0:
  123. if type(key) is not list: # noqa: E721
  124. # GH#42461 cython will raise TypeError if we pass a subclass
  125. key = list(key)
  126. return lib.is_bool_list(key)
  127. return False
  128. def cast_scalar_indexer(val):
  129. """
  130. Disallow indexing with a float key, even if that key is a round number.
  131. Parameters
  132. ----------
  133. val : scalar
  134. Returns
  135. -------
  136. outval : scalar
  137. """
  138. # assumes lib.is_scalar(val)
  139. if lib.is_float(val) and val.is_integer():
  140. raise IndexError(
  141. # GH#34193
  142. "Indexing with a float is no longer supported. Manually convert "
  143. "to an integer key instead."
  144. )
  145. return val
  146. def not_none(*args):
  147. """
  148. Returns a generator consisting of the arguments that are not None.
  149. """
  150. return (arg for arg in args if arg is not None)
  151. def any_none(*args) -> bool:
  152. """
  153. Returns a boolean indicating if any argument is None.
  154. """
  155. return any(arg is None for arg in args)
  156. def all_none(*args) -> bool:
  157. """
  158. Returns a boolean indicating if all arguments are None.
  159. """
  160. return all(arg is None for arg in args)
  161. def any_not_none(*args) -> bool:
  162. """
  163. Returns a boolean indicating if any argument is not None.
  164. """
  165. return any(arg is not None for arg in args)
  166. def all_not_none(*args) -> bool:
  167. """
  168. Returns a boolean indicating if all arguments are not None.
  169. """
  170. return all(arg is not None for arg in args)
  171. def count_not_none(*args) -> int:
  172. """
  173. Returns the count of arguments that are not None.
  174. """
  175. return sum(x is not None for x in args)
  176. @overload
  177. def asarray_tuplesafe(
  178. values: ArrayLike | list | tuple | zip, dtype: NpDtype | None = ...
  179. ) -> np.ndarray:
  180. # ExtensionArray can only be returned when values is an Index, all other iterables
  181. # will return np.ndarray. Unfortunately "all other" cannot be encoded in a type
  182. # signature, so instead we special-case some common types.
  183. ...
  184. @overload
  185. def asarray_tuplesafe(values: Iterable, dtype: NpDtype | None = ...) -> ArrayLike:
  186. ...
  187. def asarray_tuplesafe(values: Iterable, dtype: NpDtype | None = None) -> ArrayLike:
  188. if not (isinstance(values, (list, tuple)) or hasattr(values, "__array__")):
  189. values = list(values)
  190. elif isinstance(values, ABCIndex):
  191. return values._values
  192. elif isinstance(values, ABCSeries):
  193. return values._values
  194. if isinstance(values, list) and dtype in [np.object_, object]:
  195. return construct_1d_object_array_from_listlike(values)
  196. try:
  197. with warnings.catch_warnings():
  198. # Can remove warning filter once NumPy 1.24 is min version
  199. if not np_version_gte1p24:
  200. warnings.simplefilter("ignore", np.VisibleDeprecationWarning)
  201. result = np.asarray(values, dtype=dtype)
  202. except ValueError:
  203. # Using try/except since it's more performant than checking is_list_like
  204. # over each element
  205. # error: Argument 1 to "construct_1d_object_array_from_listlike"
  206. # has incompatible type "Iterable[Any]"; expected "Sized"
  207. return construct_1d_object_array_from_listlike(values) # type: ignore[arg-type]
  208. if issubclass(result.dtype.type, str):
  209. result = np.asarray(values, dtype=object)
  210. if result.ndim == 2:
  211. # Avoid building an array of arrays:
  212. values = [tuple(x) for x in values]
  213. result = construct_1d_object_array_from_listlike(values)
  214. return result
  215. def index_labels_to_array(
  216. labels: np.ndarray | Iterable, dtype: NpDtype | None = None
  217. ) -> np.ndarray:
  218. """
  219. Transform label or iterable of labels to array, for use in Index.
  220. Parameters
  221. ----------
  222. dtype : dtype
  223. If specified, use as dtype of the resulting array, otherwise infer.
  224. Returns
  225. -------
  226. array
  227. """
  228. if isinstance(labels, (str, tuple)):
  229. labels = [labels]
  230. if not isinstance(labels, (list, np.ndarray)):
  231. try:
  232. labels = list(labels)
  233. except TypeError: # non-iterable
  234. labels = [labels]
  235. labels = asarray_tuplesafe(labels, dtype=dtype)
  236. return labels
  237. def maybe_make_list(obj):
  238. if obj is not None and not isinstance(obj, (tuple, list)):
  239. return [obj]
  240. return obj
  241. def maybe_iterable_to_list(obj: Iterable[T] | T) -> Collection[T] | T:
  242. """
  243. If obj is Iterable but not list-like, consume into list.
  244. """
  245. if isinstance(obj, abc.Iterable) and not isinstance(obj, abc.Sized):
  246. return list(obj)
  247. obj = cast(Collection, obj)
  248. return obj
  249. def is_null_slice(obj) -> bool:
  250. """
  251. We have a null slice.
  252. """
  253. return (
  254. isinstance(obj, slice)
  255. and obj.start is None
  256. and obj.stop is None
  257. and obj.step is None
  258. )
  259. def is_empty_slice(obj) -> bool:
  260. """
  261. We have an empty slice, e.g. no values are selected.
  262. """
  263. return (
  264. isinstance(obj, slice)
  265. and obj.start is not None
  266. and obj.stop is not None
  267. and obj.start == obj.stop
  268. )
  269. def is_true_slices(line) -> list[bool]:
  270. """
  271. Find non-trivial slices in "line": return a list of booleans with same length.
  272. """
  273. return [isinstance(k, slice) and not is_null_slice(k) for k in line]
  274. # TODO: used only once in indexing; belongs elsewhere?
  275. def is_full_slice(obj, line: int) -> bool:
  276. """
  277. We have a full length slice.
  278. """
  279. return (
  280. isinstance(obj, slice)
  281. and obj.start == 0
  282. and obj.stop == line
  283. and obj.step is None
  284. )
  285. def get_callable_name(obj):
  286. # typical case has name
  287. if hasattr(obj, "__name__"):
  288. return getattr(obj, "__name__")
  289. # some objects don't; could recurse
  290. if isinstance(obj, partial):
  291. return get_callable_name(obj.func)
  292. # fall back to class name
  293. if callable(obj):
  294. return type(obj).__name__
  295. # everything failed (probably because the argument
  296. # wasn't actually callable); we return None
  297. # instead of the empty string in this case to allow
  298. # distinguishing between no name and a name of ''
  299. return None
  300. def apply_if_callable(maybe_callable, obj, **kwargs):
  301. """
  302. Evaluate possibly callable input using obj and kwargs if it is callable,
  303. otherwise return as it is.
  304. Parameters
  305. ----------
  306. maybe_callable : possibly a callable
  307. obj : NDFrame
  308. **kwargs
  309. """
  310. if callable(maybe_callable):
  311. return maybe_callable(obj, **kwargs)
  312. return maybe_callable
  313. def standardize_mapping(into):
  314. """
  315. Helper function to standardize a supplied mapping.
  316. Parameters
  317. ----------
  318. into : instance or subclass of collections.abc.Mapping
  319. Must be a class, an initialized collections.defaultdict,
  320. or an instance of a collections.abc.Mapping subclass.
  321. Returns
  322. -------
  323. mapping : a collections.abc.Mapping subclass or other constructor
  324. a callable object that can accept an iterator to create
  325. the desired Mapping.
  326. See Also
  327. --------
  328. DataFrame.to_dict
  329. Series.to_dict
  330. """
  331. if not inspect.isclass(into):
  332. if isinstance(into, defaultdict):
  333. return partial(defaultdict, into.default_factory)
  334. into = type(into)
  335. if not issubclass(into, abc.Mapping):
  336. raise TypeError(f"unsupported type: {into}")
  337. if into == defaultdict:
  338. raise TypeError("to_dict() only accepts initialized defaultdicts")
  339. return into
  340. @overload
  341. def random_state(state: np.random.Generator) -> np.random.Generator:
  342. ...
  343. @overload
  344. def random_state(
  345. state: int | np.ndarray | np.random.BitGenerator | np.random.RandomState | None,
  346. ) -> np.random.RandomState:
  347. ...
  348. def random_state(state: RandomState | None = None):
  349. """
  350. Helper function for processing random_state arguments.
  351. Parameters
  352. ----------
  353. state : int, array-like, BitGenerator, Generator, np.random.RandomState, None.
  354. If receives an int, array-like, or BitGenerator, passes to
  355. np.random.RandomState() as seed.
  356. If receives an np.random RandomState or Generator, just returns that unchanged.
  357. If receives `None`, returns np.random.
  358. If receives anything else, raises an informative ValueError.
  359. Default None.
  360. Returns
  361. -------
  362. np.random.RandomState or np.random.Generator. If state is None, returns np.random
  363. """
  364. if is_integer(state) or isinstance(state, (np.ndarray, np.random.BitGenerator)):
  365. return np.random.RandomState(state)
  366. elif isinstance(state, np.random.RandomState):
  367. return state
  368. elif isinstance(state, np.random.Generator):
  369. return state
  370. elif state is None:
  371. return np.random
  372. else:
  373. raise ValueError(
  374. "random_state must be an integer, array-like, a BitGenerator, Generator, "
  375. "a numpy RandomState, or None"
  376. )
  377. def pipe(
  378. obj, func: Callable[..., T] | tuple[Callable[..., T], str], *args, **kwargs
  379. ) -> T:
  380. """
  381. Apply a function ``func`` to object ``obj`` either by passing obj as the
  382. first argument to the function or, in the case that the func is a tuple,
  383. interpret the first element of the tuple as a function and pass the obj to
  384. that function as a keyword argument whose key is the value of the second
  385. element of the tuple.
  386. Parameters
  387. ----------
  388. func : callable or tuple of (callable, str)
  389. Function to apply to this object or, alternatively, a
  390. ``(callable, data_keyword)`` tuple where ``data_keyword`` is a
  391. string indicating the keyword of ``callable`` that expects the
  392. object.
  393. *args : iterable, optional
  394. Positional arguments passed into ``func``.
  395. **kwargs : dict, optional
  396. A dictionary of keyword arguments passed into ``func``.
  397. Returns
  398. -------
  399. object : the return type of ``func``.
  400. """
  401. if isinstance(func, tuple):
  402. func, target = func
  403. if target in kwargs:
  404. msg = f"{target} is both the pipe target and a keyword argument"
  405. raise ValueError(msg)
  406. kwargs[target] = obj
  407. return func(*args, **kwargs)
  408. else:
  409. return func(obj, *args, **kwargs)
  410. def get_rename_function(mapper):
  411. """
  412. Returns a function that will map names/labels, dependent if mapper
  413. is a dict, Series or just a function.
  414. """
  415. def f(x):
  416. if x in mapper:
  417. return mapper[x]
  418. else:
  419. return x
  420. return f if isinstance(mapper, (abc.Mapping, ABCSeries)) else mapper
  421. def convert_to_list_like(
  422. values: Hashable | Iterable | AnyArrayLike,
  423. ) -> list | AnyArrayLike:
  424. """
  425. Convert list-like or scalar input to list-like. List, numpy and pandas array-like
  426. inputs are returned unmodified whereas others are converted to list.
  427. """
  428. if isinstance(values, (list, np.ndarray, ABCIndex, ABCSeries, ABCExtensionArray)):
  429. return values
  430. elif isinstance(values, abc.Iterable) and not isinstance(values, str):
  431. return list(values)
  432. return [values]
  433. @contextlib.contextmanager
  434. def temp_setattr(
  435. obj, attr: str, value, condition: bool = True
  436. ) -> Generator[None, None, None]:
  437. """
  438. Temporarily set attribute on an object.
  439. Parameters
  440. ----------
  441. obj : object
  442. Object whose attribute will be modified.
  443. attr : str
  444. Attribute to modify.
  445. value : Any
  446. Value to temporarily set attribute to.
  447. condition : bool, default True
  448. Whether to set the attribute. Provided in order to not have to
  449. conditionally use this context manager.
  450. Yields
  451. ------
  452. object : obj with modified attribute.
  453. """
  454. if condition:
  455. old_value = getattr(obj, attr)
  456. setattr(obj, attr, value)
  457. try:
  458. yield obj
  459. finally:
  460. if condition:
  461. setattr(obj, attr, old_value)
  462. def require_length_match(data, index: Index) -> None:
  463. """
  464. Check the length of data matches the length of the index.
  465. """
  466. if len(data) != len(index):
  467. raise ValueError(
  468. "Length of values "
  469. f"({len(data)}) "
  470. "does not match length of index "
  471. f"({len(index)})"
  472. )
  473. # the ufuncs np.maximum.reduce and np.minimum.reduce default to axis=0,
  474. # whereas np.min and np.max (which directly call obj.min and obj.max)
  475. # default to axis=None.
  476. _builtin_table = {
  477. builtins.sum: np.sum,
  478. builtins.max: np.maximum.reduce,
  479. builtins.min: np.minimum.reduce,
  480. }
  481. # GH#53425: Only for deprecation
  482. _builtin_table_alias = {
  483. builtins.sum: "np.sum",
  484. builtins.max: "np.maximum.reduce",
  485. builtins.min: "np.minimum.reduce",
  486. }
  487. _cython_table = {
  488. builtins.sum: "sum",
  489. builtins.max: "max",
  490. builtins.min: "min",
  491. np.all: "all",
  492. np.any: "any",
  493. np.sum: "sum",
  494. np.nansum: "sum",
  495. np.mean: "mean",
  496. np.nanmean: "mean",
  497. np.prod: "prod",
  498. np.nanprod: "prod",
  499. np.std: "std",
  500. np.nanstd: "std",
  501. np.var: "var",
  502. np.nanvar: "var",
  503. np.median: "median",
  504. np.nanmedian: "median",
  505. np.max: "max",
  506. np.nanmax: "max",
  507. np.min: "min",
  508. np.nanmin: "min",
  509. np.cumprod: "cumprod",
  510. np.nancumprod: "cumprod",
  511. np.cumsum: "cumsum",
  512. np.nancumsum: "cumsum",
  513. }
  514. def get_cython_func(arg: Callable) -> str | None:
  515. """
  516. if we define an internal function for this argument, return it
  517. """
  518. return _cython_table.get(arg)
  519. def is_builtin_func(arg):
  520. """
  521. if we define a builtin function for this argument, return it,
  522. otherwise return the arg
  523. """
  524. return _builtin_table.get(arg, arg)
  525. def fill_missing_names(names: Sequence[Hashable | None]) -> list[Hashable]:
  526. """
  527. If a name is missing then replace it by level_n, where n is the count
  528. .. versionadded:: 1.4.0
  529. Parameters
  530. ----------
  531. names : list-like
  532. list of column names or None values.
  533. Returns
  534. -------
  535. list
  536. list of column names with the None values replaced.
  537. """
  538. return [f"level_{i}" if name is None else name for i, name in enumerate(names)]