_validators.py 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456
  1. """
  2. Module that contains many useful utilities
  3. for validating data or function arguments
  4. """
  5. from __future__ import annotations
  6. from collections.abc import (
  7. Iterable,
  8. Sequence,
  9. )
  10. from typing import (
  11. TypeVar,
  12. overload,
  13. )
  14. import numpy as np
  15. from pandas._libs import lib
  16. from pandas.core.dtypes.common import (
  17. is_bool,
  18. is_integer,
  19. )
  20. BoolishT = TypeVar("BoolishT", bool, int)
  21. BoolishNoneT = TypeVar("BoolishNoneT", bool, int, None)
  22. def _check_arg_length(fname, args, max_fname_arg_count, compat_args) -> None:
  23. """
  24. Checks whether 'args' has length of at most 'compat_args'. Raises
  25. a TypeError if that is not the case, similar to in Python when a
  26. function is called with too many arguments.
  27. """
  28. if max_fname_arg_count < 0:
  29. raise ValueError("'max_fname_arg_count' must be non-negative")
  30. if len(args) > len(compat_args):
  31. max_arg_count = len(compat_args) + max_fname_arg_count
  32. actual_arg_count = len(args) + max_fname_arg_count
  33. argument = "argument" if max_arg_count == 1 else "arguments"
  34. raise TypeError(
  35. f"{fname}() takes at most {max_arg_count} {argument} "
  36. f"({actual_arg_count} given)"
  37. )
  38. def _check_for_default_values(fname, arg_val_dict, compat_args) -> None:
  39. """
  40. Check that the keys in `arg_val_dict` are mapped to their
  41. default values as specified in `compat_args`.
  42. Note that this function is to be called only when it has been
  43. checked that arg_val_dict.keys() is a subset of compat_args
  44. """
  45. for key in arg_val_dict:
  46. # try checking equality directly with '=' operator,
  47. # as comparison may have been overridden for the left
  48. # hand object
  49. try:
  50. v1 = arg_val_dict[key]
  51. v2 = compat_args[key]
  52. # check for None-ness otherwise we could end up
  53. # comparing a numpy array vs None
  54. if (v1 is not None and v2 is None) or (v1 is None and v2 is not None):
  55. match = False
  56. else:
  57. match = v1 == v2
  58. if not is_bool(match):
  59. raise ValueError("'match' is not a boolean")
  60. # could not compare them directly, so try comparison
  61. # using the 'is' operator
  62. except ValueError:
  63. match = arg_val_dict[key] is compat_args[key]
  64. if not match:
  65. raise ValueError(
  66. f"the '{key}' parameter is not supported in "
  67. f"the pandas implementation of {fname}()"
  68. )
  69. def validate_args(fname, args, max_fname_arg_count, compat_args) -> None:
  70. """
  71. Checks whether the length of the `*args` argument passed into a function
  72. has at most `len(compat_args)` arguments and whether or not all of these
  73. elements in `args` are set to their default values.
  74. Parameters
  75. ----------
  76. fname : str
  77. The name of the function being passed the `*args` parameter
  78. args : tuple
  79. The `*args` parameter passed into a function
  80. max_fname_arg_count : int
  81. The maximum number of arguments that the function `fname`
  82. can accept, excluding those in `args`. Used for displaying
  83. appropriate error messages. Must be non-negative.
  84. compat_args : dict
  85. A dictionary of keys and their associated default values.
  86. In order to accommodate buggy behaviour in some versions of `numpy`,
  87. where a signature displayed keyword arguments but then passed those
  88. arguments **positionally** internally when calling downstream
  89. implementations, a dict ensures that the original
  90. order of the keyword arguments is enforced.
  91. Raises
  92. ------
  93. TypeError
  94. If `args` contains more values than there are `compat_args`
  95. ValueError
  96. If `args` contains values that do not correspond to those
  97. of the default values specified in `compat_args`
  98. """
  99. _check_arg_length(fname, args, max_fname_arg_count, compat_args)
  100. # We do this so that we can provide a more informative
  101. # error message about the parameters that we are not
  102. # supporting in the pandas implementation of 'fname'
  103. kwargs = dict(zip(compat_args, args))
  104. _check_for_default_values(fname, kwargs, compat_args)
  105. def _check_for_invalid_keys(fname, kwargs, compat_args) -> None:
  106. """
  107. Checks whether 'kwargs' contains any keys that are not
  108. in 'compat_args' and raises a TypeError if there is one.
  109. """
  110. # set(dict) --> set of the dictionary's keys
  111. diff = set(kwargs) - set(compat_args)
  112. if diff:
  113. bad_arg = next(iter(diff))
  114. raise TypeError(f"{fname}() got an unexpected keyword argument '{bad_arg}'")
  115. def validate_kwargs(fname, kwargs, compat_args) -> None:
  116. """
  117. Checks whether parameters passed to the **kwargs argument in a
  118. function `fname` are valid parameters as specified in `*compat_args`
  119. and whether or not they are set to their default values.
  120. Parameters
  121. ----------
  122. fname : str
  123. The name of the function being passed the `**kwargs` parameter
  124. kwargs : dict
  125. The `**kwargs` parameter passed into `fname`
  126. compat_args: dict
  127. A dictionary of keys that `kwargs` is allowed to have and their
  128. associated default values
  129. Raises
  130. ------
  131. TypeError if `kwargs` contains keys not in `compat_args`
  132. ValueError if `kwargs` contains keys in `compat_args` that do not
  133. map to the default values specified in `compat_args`
  134. """
  135. kwds = kwargs.copy()
  136. _check_for_invalid_keys(fname, kwargs, compat_args)
  137. _check_for_default_values(fname, kwds, compat_args)
  138. def validate_args_and_kwargs(
  139. fname, args, kwargs, max_fname_arg_count, compat_args
  140. ) -> None:
  141. """
  142. Checks whether parameters passed to the *args and **kwargs argument in a
  143. function `fname` are valid parameters as specified in `*compat_args`
  144. and whether or not they are set to their default values.
  145. Parameters
  146. ----------
  147. fname: str
  148. The name of the function being passed the `**kwargs` parameter
  149. args: tuple
  150. The `*args` parameter passed into a function
  151. kwargs: dict
  152. The `**kwargs` parameter passed into `fname`
  153. max_fname_arg_count: int
  154. The minimum number of arguments that the function `fname`
  155. requires, excluding those in `args`. Used for displaying
  156. appropriate error messages. Must be non-negative.
  157. compat_args: dict
  158. A dictionary of keys that `kwargs` is allowed to
  159. have and their associated default values.
  160. Raises
  161. ------
  162. TypeError if `args` contains more values than there are
  163. `compat_args` OR `kwargs` contains keys not in `compat_args`
  164. ValueError if `args` contains values not at the default value (`None`)
  165. `kwargs` contains keys in `compat_args` that do not map to the default
  166. value as specified in `compat_args`
  167. See Also
  168. --------
  169. validate_args : Purely args validation.
  170. validate_kwargs : Purely kwargs validation.
  171. """
  172. # Check that the total number of arguments passed in (i.e.
  173. # args and kwargs) does not exceed the length of compat_args
  174. _check_arg_length(
  175. fname, args + tuple(kwargs.values()), max_fname_arg_count, compat_args
  176. )
  177. # Check there is no overlap with the positional and keyword
  178. # arguments, similar to what is done in actual Python functions
  179. args_dict = dict(zip(compat_args, args))
  180. for key in args_dict:
  181. if key in kwargs:
  182. raise TypeError(
  183. f"{fname}() got multiple values for keyword argument '{key}'"
  184. )
  185. kwargs.update(args_dict)
  186. validate_kwargs(fname, kwargs, compat_args)
  187. def validate_bool_kwarg(
  188. value: BoolishNoneT,
  189. arg_name: str,
  190. none_allowed: bool = True,
  191. int_allowed: bool = False,
  192. ) -> BoolishNoneT:
  193. """
  194. Ensure that argument passed in arg_name can be interpreted as boolean.
  195. Parameters
  196. ----------
  197. value : bool
  198. Value to be validated.
  199. arg_name : str
  200. Name of the argument. To be reflected in the error message.
  201. none_allowed : bool, default True
  202. Whether to consider None to be a valid boolean.
  203. int_allowed : bool, default False
  204. Whether to consider integer value to be a valid boolean.
  205. Returns
  206. -------
  207. value
  208. The same value as input.
  209. Raises
  210. ------
  211. ValueError
  212. If the value is not a valid boolean.
  213. """
  214. good_value = is_bool(value)
  215. if none_allowed:
  216. good_value = good_value or (value is None)
  217. if int_allowed:
  218. good_value = good_value or isinstance(value, int)
  219. if not good_value:
  220. raise ValueError(
  221. f'For argument "{arg_name}" expected type bool, received '
  222. f"type {type(value).__name__}."
  223. )
  224. return value # pyright: ignore[reportGeneralTypeIssues]
  225. def validate_fillna_kwargs(value, method, validate_scalar_dict_value: bool = True):
  226. """
  227. Validate the keyword arguments to 'fillna'.
  228. This checks that exactly one of 'value' and 'method' is specified.
  229. If 'method' is specified, this validates that it's a valid method.
  230. Parameters
  231. ----------
  232. value, method : object
  233. The 'value' and 'method' keyword arguments for 'fillna'.
  234. validate_scalar_dict_value : bool, default True
  235. Whether to validate that 'value' is a scalar or dict. Specifically,
  236. validate that it is not a list or tuple.
  237. Returns
  238. -------
  239. value, method : object
  240. """
  241. from pandas.core.missing import clean_fill_method
  242. if value is None and method is None:
  243. raise ValueError("Must specify a fill 'value' or 'method'.")
  244. if value is None and method is not None:
  245. method = clean_fill_method(method)
  246. elif value is not None and method is None:
  247. if validate_scalar_dict_value and isinstance(value, (list, tuple)):
  248. raise TypeError(
  249. '"value" parameter must be a scalar or dict, but '
  250. f'you passed a "{type(value).__name__}"'
  251. )
  252. elif value is not None and method is not None:
  253. raise ValueError("Cannot specify both 'value' and 'method'.")
  254. return value, method
  255. def validate_percentile(q: float | Iterable[float]) -> np.ndarray:
  256. """
  257. Validate percentiles (used by describe and quantile).
  258. This function checks if the given float or iterable of floats is a valid percentile
  259. otherwise raises a ValueError.
  260. Parameters
  261. ----------
  262. q: float or iterable of floats
  263. A single percentile or an iterable of percentiles.
  264. Returns
  265. -------
  266. ndarray
  267. An ndarray of the percentiles if valid.
  268. Raises
  269. ------
  270. ValueError if percentiles are not in given interval([0, 1]).
  271. """
  272. q_arr = np.asarray(q)
  273. # Don't change this to an f-string. The string formatting
  274. # is too expensive for cases where we don't need it.
  275. msg = "percentiles should all be in the interval [0, 1]"
  276. if q_arr.ndim == 0:
  277. if not 0 <= q_arr <= 1:
  278. raise ValueError(msg)
  279. else:
  280. if not all(0 <= qs <= 1 for qs in q_arr):
  281. raise ValueError(msg)
  282. return q_arr
  283. @overload
  284. def validate_ascending(ascending: BoolishT) -> BoolishT:
  285. ...
  286. @overload
  287. def validate_ascending(ascending: Sequence[BoolishT]) -> list[BoolishT]:
  288. ...
  289. def validate_ascending(
  290. ascending: bool | int | Sequence[BoolishT],
  291. ) -> bool | int | list[BoolishT]:
  292. """Validate ``ascending`` kwargs for ``sort_index`` method."""
  293. kwargs = {"none_allowed": False, "int_allowed": True}
  294. if not isinstance(ascending, Sequence):
  295. return validate_bool_kwarg(ascending, "ascending", **kwargs)
  296. return [validate_bool_kwarg(item, "ascending", **kwargs) for item in ascending]
  297. def validate_endpoints(closed: str | None) -> tuple[bool, bool]:
  298. """
  299. Check that the `closed` argument is among [None, "left", "right"]
  300. Parameters
  301. ----------
  302. closed : {None, "left", "right"}
  303. Returns
  304. -------
  305. left_closed : bool
  306. right_closed : bool
  307. Raises
  308. ------
  309. ValueError : if argument is not among valid values
  310. """
  311. left_closed = False
  312. right_closed = False
  313. if closed is None:
  314. left_closed = True
  315. right_closed = True
  316. elif closed == "left":
  317. left_closed = True
  318. elif closed == "right":
  319. right_closed = True
  320. else:
  321. raise ValueError("Closed has to be either 'left', 'right' or None")
  322. return left_closed, right_closed
  323. def validate_inclusive(inclusive: str | None) -> tuple[bool, bool]:
  324. """
  325. Check that the `inclusive` argument is among {"both", "neither", "left", "right"}.
  326. Parameters
  327. ----------
  328. inclusive : {"both", "neither", "left", "right"}
  329. Returns
  330. -------
  331. left_right_inclusive : tuple[bool, bool]
  332. Raises
  333. ------
  334. ValueError : if argument is not among valid values
  335. """
  336. left_right_inclusive: tuple[bool, bool] | None = None
  337. if isinstance(inclusive, str):
  338. left_right_inclusive = {
  339. "both": (True, True),
  340. "left": (True, False),
  341. "right": (False, True),
  342. "neither": (False, False),
  343. }.get(inclusive)
  344. if left_right_inclusive is None:
  345. raise ValueError(
  346. "Inclusive has to be either 'both', 'neither', 'left' or 'right'"
  347. )
  348. return left_right_inclusive
  349. def validate_insert_loc(loc: int, length: int) -> int:
  350. """
  351. Check that we have an integer between -length and length, inclusive.
  352. Standardize negative loc to within [0, length].
  353. The exceptions we raise on failure match np.insert.
  354. """
  355. if not is_integer(loc):
  356. raise TypeError(f"loc must be an integer between -{length} and {length}")
  357. if loc < 0:
  358. loc += length
  359. if not 0 <= loc <= length:
  360. raise IndexError(f"loc must be an integer between -{length} and {length}")
  361. return loc # pyright: ignore[reportGeneralTypeIssues]
  362. def check_dtype_backend(dtype_backend) -> None:
  363. if dtype_backend is not lib.no_default:
  364. if dtype_backend not in ["numpy_nullable", "pyarrow"]:
  365. raise ValueError(
  366. f"dtype_backend {dtype_backend} is invalid, only 'numpy_nullable' and "
  367. f"'pyarrow' are allowed.",
  368. )