_typing.py 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525
  1. from __future__ import annotations
  2. from collections.abc import (
  3. Hashable,
  4. Iterator,
  5. Mapping,
  6. MutableMapping,
  7. Sequence,
  8. )
  9. from datetime import (
  10. date,
  11. datetime,
  12. timedelta,
  13. tzinfo,
  14. )
  15. from os import PathLike
  16. import sys
  17. from typing import (
  18. TYPE_CHECKING,
  19. Any,
  20. Callable,
  21. Literal,
  22. Optional,
  23. Protocol,
  24. Type as type_t,
  25. TypeVar,
  26. Union,
  27. overload,
  28. )
  29. import numpy as np
  30. # To prevent import cycles place any internal imports in the branch below
  31. # and use a string literal forward reference to it in subsequent types
  32. # https://mypy.readthedocs.io/en/latest/common_issues.html#import-cycles
  33. if TYPE_CHECKING:
  34. import numpy.typing as npt
  35. from pandas._libs import (
  36. NaTType,
  37. Period,
  38. Timedelta,
  39. Timestamp,
  40. )
  41. from pandas._libs.tslibs import BaseOffset
  42. from pandas.core.dtypes.dtypes import ExtensionDtype
  43. from pandas import Interval
  44. from pandas.arrays import (
  45. DatetimeArray,
  46. TimedeltaArray,
  47. )
  48. from pandas.core.arrays.base import ExtensionArray
  49. from pandas.core.frame import DataFrame
  50. from pandas.core.generic import NDFrame
  51. from pandas.core.groupby.generic import (
  52. DataFrameGroupBy,
  53. GroupBy,
  54. SeriesGroupBy,
  55. )
  56. from pandas.core.indexes.base import Index
  57. from pandas.core.internals import (
  58. ArrayManager,
  59. BlockManager,
  60. SingleArrayManager,
  61. SingleBlockManager,
  62. )
  63. from pandas.core.resample import Resampler
  64. from pandas.core.series import Series
  65. from pandas.core.window.rolling import BaseWindow
  66. from pandas.io.formats.format import EngFormatter
  67. from pandas.tseries.holiday import AbstractHolidayCalendar
  68. ScalarLike_co = Union[
  69. int,
  70. float,
  71. complex,
  72. str,
  73. bytes,
  74. np.generic,
  75. ]
  76. # numpy compatible types
  77. NumpyValueArrayLike = Union[ScalarLike_co, npt.ArrayLike]
  78. # Name "npt._ArrayLikeInt_co" is not defined [name-defined]
  79. NumpySorter = Optional[npt._ArrayLikeInt_co] # type: ignore[name-defined]
  80. from typing import SupportsIndex
  81. if sys.version_info >= (3, 10):
  82. from typing import TypeGuard # pyright: ignore[reportUnusedImport]
  83. else:
  84. from typing_extensions import TypeGuard # pyright: ignore[reportUnusedImport]
  85. if sys.version_info >= (3, 11):
  86. from typing import Self # pyright: ignore[reportUnusedImport]
  87. else:
  88. from typing_extensions import Self # pyright: ignore[reportUnusedImport]
  89. else:
  90. npt: Any = None
  91. Self: Any = None
  92. TypeGuard: Any = None
  93. HashableT = TypeVar("HashableT", bound=Hashable)
  94. MutableMappingT = TypeVar("MutableMappingT", bound=MutableMapping)
  95. # array-like
  96. ArrayLike = Union["ExtensionArray", np.ndarray]
  97. AnyArrayLike = Union[ArrayLike, "Index", "Series"]
  98. TimeArrayLike = Union["DatetimeArray", "TimedeltaArray"]
  99. # list-like
  100. # from https://github.com/hauntsaninja/useful_types
  101. # includes Sequence-like objects but excludes str and bytes
  102. _T_co = TypeVar("_T_co", covariant=True)
  103. class SequenceNotStr(Protocol[_T_co]):
  104. @overload
  105. def __getitem__(self, index: SupportsIndex, /) -> _T_co:
  106. ...
  107. @overload
  108. def __getitem__(self, index: slice, /) -> Sequence[_T_co]:
  109. ...
  110. def __contains__(self, value: object, /) -> bool:
  111. ...
  112. def __len__(self) -> int:
  113. ...
  114. def __iter__(self) -> Iterator[_T_co]:
  115. ...
  116. def index(self, value: Any, /, start: int = 0, stop: int = ...) -> int:
  117. ...
  118. def count(self, value: Any, /) -> int:
  119. ...
  120. def __reversed__(self) -> Iterator[_T_co]:
  121. ...
  122. ListLike = Union[AnyArrayLike, SequenceNotStr, range]
  123. # scalars
  124. PythonScalar = Union[str, float, bool]
  125. DatetimeLikeScalar = Union["Period", "Timestamp", "Timedelta"]
  126. PandasScalar = Union["Period", "Timestamp", "Timedelta", "Interval"]
  127. Scalar = Union[PythonScalar, PandasScalar, np.datetime64, np.timedelta64, date]
  128. IntStrT = TypeVar("IntStrT", bound=Union[int, str])
  129. # timestamp and timedelta convertible types
  130. TimestampConvertibleTypes = Union[
  131. "Timestamp", date, np.datetime64, np.int64, float, str
  132. ]
  133. TimestampNonexistent = Union[
  134. Literal["shift_forward", "shift_backward", "NaT", "raise"], timedelta
  135. ]
  136. TimedeltaConvertibleTypes = Union[
  137. "Timedelta", timedelta, np.timedelta64, np.int64, float, str
  138. ]
  139. Timezone = Union[str, tzinfo]
  140. ToTimestampHow = Literal["s", "e", "start", "end"]
  141. # NDFrameT is stricter and ensures that the same subclass of NDFrame always is
  142. # used. E.g. `def func(a: NDFrameT) -> NDFrameT: ...` means that if a
  143. # Series is passed into a function, a Series is always returned and if a DataFrame is
  144. # passed in, a DataFrame is always returned.
  145. NDFrameT = TypeVar("NDFrameT", bound="NDFrame")
  146. NumpyIndexT = TypeVar("NumpyIndexT", np.ndarray, "Index")
  147. AxisInt = int
  148. Axis = Union[AxisInt, Literal["index", "columns", "rows"]]
  149. IndexLabel = Union[Hashable, Sequence[Hashable]]
  150. Level = Hashable
  151. Shape = tuple[int, ...]
  152. Suffixes = tuple[Optional[str], Optional[str]]
  153. Ordered = Optional[bool]
  154. JSONSerializable = Optional[Union[PythonScalar, list, dict]]
  155. Frequency = Union[str, "BaseOffset"]
  156. Axes = ListLike
  157. RandomState = Union[
  158. int,
  159. np.ndarray,
  160. np.random.Generator,
  161. np.random.BitGenerator,
  162. np.random.RandomState,
  163. ]
  164. # dtypes
  165. NpDtype = Union[str, np.dtype, type_t[Union[str, complex, bool, object]]]
  166. Dtype = Union["ExtensionDtype", NpDtype]
  167. AstypeArg = Union["ExtensionDtype", "npt.DTypeLike"]
  168. # DtypeArg specifies all allowable dtypes in a functions its dtype argument
  169. DtypeArg = Union[Dtype, dict[Hashable, Dtype]]
  170. DtypeObj = Union[np.dtype, "ExtensionDtype"]
  171. # converters
  172. ConvertersArg = dict[Hashable, Callable[[Dtype], Dtype]]
  173. # parse_dates
  174. ParseDatesArg = Union[
  175. bool, list[Hashable], list[list[Hashable]], dict[Hashable, list[Hashable]]
  176. ]
  177. # For functions like rename that convert one label to another
  178. Renamer = Union[Mapping[Any, Hashable], Callable[[Any], Hashable]]
  179. # to maintain type information across generic functions and parametrization
  180. T = TypeVar("T")
  181. # used in decorators to preserve the signature of the function it decorates
  182. # see https://mypy.readthedocs.io/en/stable/generics.html#declaring-decorators
  183. FuncType = Callable[..., Any]
  184. F = TypeVar("F", bound=FuncType)
  185. # types of vectorized key functions for DataFrame::sort_values and
  186. # DataFrame::sort_index, among others
  187. ValueKeyFunc = Optional[Callable[["Series"], Union["Series", AnyArrayLike]]]
  188. IndexKeyFunc = Optional[Callable[["Index"], Union["Index", AnyArrayLike]]]
  189. # types of `func` kwarg for DataFrame.aggregate and Series.aggregate
  190. AggFuncTypeBase = Union[Callable, str]
  191. AggFuncTypeDict = MutableMapping[
  192. Hashable, Union[AggFuncTypeBase, list[AggFuncTypeBase]]
  193. ]
  194. AggFuncType = Union[
  195. AggFuncTypeBase,
  196. list[AggFuncTypeBase],
  197. AggFuncTypeDict,
  198. ]
  199. AggObjType = Union[
  200. "Series",
  201. "DataFrame",
  202. "GroupBy",
  203. "SeriesGroupBy",
  204. "DataFrameGroupBy",
  205. "BaseWindow",
  206. "Resampler",
  207. ]
  208. PythonFuncType = Callable[[Any], Any]
  209. # filenames and file-like-objects
  210. AnyStr_co = TypeVar("AnyStr_co", str, bytes, covariant=True)
  211. AnyStr_contra = TypeVar("AnyStr_contra", str, bytes, contravariant=True)
  212. class BaseBuffer(Protocol):
  213. @property
  214. def mode(self) -> str:
  215. # for _get_filepath_or_buffer
  216. ...
  217. def seek(self, __offset: int, __whence: int = ...) -> int:
  218. # with one argument: gzip.GzipFile, bz2.BZ2File
  219. # with two arguments: zip.ZipFile, read_sas
  220. ...
  221. def seekable(self) -> bool:
  222. # for bz2.BZ2File
  223. ...
  224. def tell(self) -> int:
  225. # for zip.ZipFile, read_stata, to_stata
  226. ...
  227. class ReadBuffer(BaseBuffer, Protocol[AnyStr_co]):
  228. def read(self, __n: int = ...) -> AnyStr_co:
  229. # for BytesIOWrapper, gzip.GzipFile, bz2.BZ2File
  230. ...
  231. class WriteBuffer(BaseBuffer, Protocol[AnyStr_contra]):
  232. def write(self, __b: AnyStr_contra) -> Any:
  233. # for gzip.GzipFile, bz2.BZ2File
  234. ...
  235. def flush(self) -> Any:
  236. # for gzip.GzipFile, bz2.BZ2File
  237. ...
  238. class ReadPickleBuffer(ReadBuffer[bytes], Protocol):
  239. def readline(self) -> bytes:
  240. ...
  241. class WriteExcelBuffer(WriteBuffer[bytes], Protocol):
  242. def truncate(self, size: int | None = ...) -> int:
  243. ...
  244. class ReadCsvBuffer(ReadBuffer[AnyStr_co], Protocol):
  245. def __iter__(self) -> Iterator[AnyStr_co]:
  246. # for engine=python
  247. ...
  248. def fileno(self) -> int:
  249. # for _MMapWrapper
  250. ...
  251. def readline(self) -> AnyStr_co:
  252. # for engine=python
  253. ...
  254. @property
  255. def closed(self) -> bool:
  256. # for enine=pyarrow
  257. ...
  258. FilePath = Union[str, "PathLike[str]"]
  259. # for arbitrary kwargs passed during reading/writing files
  260. StorageOptions = Optional[dict[str, Any]]
  261. # compression keywords and compression
  262. CompressionDict = dict[str, Any]
  263. CompressionOptions = Optional[
  264. Union[Literal["infer", "gzip", "bz2", "zip", "xz", "zstd", "tar"], CompressionDict]
  265. ]
  266. # types in DataFrameFormatter
  267. FormattersType = Union[
  268. list[Callable], tuple[Callable, ...], Mapping[Union[str, int], Callable]
  269. ]
  270. ColspaceType = Mapping[Hashable, Union[str, int]]
  271. FloatFormatType = Union[str, Callable, "EngFormatter"]
  272. ColspaceArgType = Union[
  273. str, int, Sequence[Union[str, int]], Mapping[Hashable, Union[str, int]]
  274. ]
  275. # Arguments for fillna()
  276. FillnaOptions = Literal["backfill", "bfill", "ffill", "pad"]
  277. InterpolateOptions = Literal[
  278. "linear",
  279. "time",
  280. "index",
  281. "values",
  282. "nearest",
  283. "zero",
  284. "slinear",
  285. "quadratic",
  286. "cubic",
  287. "barycentric",
  288. "polynomial",
  289. "krogh",
  290. "piecewise_polynomial",
  291. "spline",
  292. "pchip",
  293. "akima",
  294. "cubicspline",
  295. "from_derivatives",
  296. ]
  297. # internals
  298. Manager = Union[
  299. "ArrayManager", "SingleArrayManager", "BlockManager", "SingleBlockManager"
  300. ]
  301. SingleManager = Union["SingleArrayManager", "SingleBlockManager"]
  302. Manager2D = Union["ArrayManager", "BlockManager"]
  303. # indexing
  304. # PositionalIndexer -> valid 1D positional indexer, e.g. can pass
  305. # to ndarray.__getitem__
  306. # ScalarIndexer is for a single value as the index
  307. # SequenceIndexer is for list like or slices (but not tuples)
  308. # PositionalIndexerTuple is extends the PositionalIndexer for 2D arrays
  309. # These are used in various __getitem__ overloads
  310. # TODO(typing#684): add Ellipsis, see
  311. # https://github.com/python/typing/issues/684#issuecomment-548203158
  312. # https://bugs.python.org/issue41810
  313. # Using List[int] here rather than Sequence[int] to disallow tuples.
  314. ScalarIndexer = Union[int, np.integer]
  315. SequenceIndexer = Union[slice, list[int], np.ndarray]
  316. PositionalIndexer = Union[ScalarIndexer, SequenceIndexer]
  317. PositionalIndexerTuple = tuple[PositionalIndexer, PositionalIndexer]
  318. PositionalIndexer2D = Union[PositionalIndexer, PositionalIndexerTuple]
  319. if TYPE_CHECKING:
  320. TakeIndexer = Union[Sequence[int], Sequence[np.integer], npt.NDArray[np.integer]]
  321. else:
  322. TakeIndexer = Any
  323. # Shared by functions such as drop and astype
  324. IgnoreRaise = Literal["ignore", "raise"]
  325. # Windowing rank methods
  326. WindowingRankType = Literal["average", "min", "max"]
  327. # read_csv engines
  328. CSVEngine = Literal["c", "python", "pyarrow", "python-fwf"]
  329. # read_json engines
  330. JSONEngine = Literal["ujson", "pyarrow"]
  331. # read_xml parsers
  332. XMLParsers = Literal["lxml", "etree"]
  333. # read_html flavors
  334. HTMLFlavors = Literal["lxml", "html5lib", "bs4"]
  335. # Interval closed type
  336. IntervalLeftRight = Literal["left", "right"]
  337. IntervalClosedType = Union[IntervalLeftRight, Literal["both", "neither"]]
  338. # datetime and NaTType
  339. DatetimeNaTType = Union[datetime, "NaTType"]
  340. DateTimeErrorChoices = Union[IgnoreRaise, Literal["coerce"]]
  341. # sort_index
  342. SortKind = Literal["quicksort", "mergesort", "heapsort", "stable"]
  343. NaPosition = Literal["first", "last"]
  344. # Arguments for nsmalles and n_largest
  345. NsmallestNlargestKeep = Literal["first", "last", "all"]
  346. # quantile interpolation
  347. QuantileInterpolation = Literal["linear", "lower", "higher", "midpoint", "nearest"]
  348. # plotting
  349. PlottingOrientation = Literal["horizontal", "vertical"]
  350. # dropna
  351. AnyAll = Literal["any", "all"]
  352. # merge
  353. MergeHow = Literal["left", "right", "inner", "outer", "cross"]
  354. MergeValidate = Literal[
  355. "one_to_one",
  356. "1:1",
  357. "one_to_many",
  358. "1:m",
  359. "many_to_one",
  360. "m:1",
  361. "many_to_many",
  362. "m:m",
  363. ]
  364. # join
  365. JoinHow = Literal["left", "right", "inner", "outer"]
  366. JoinValidate = Literal[
  367. "one_to_one",
  368. "1:1",
  369. "one_to_many",
  370. "1:m",
  371. "many_to_one",
  372. "m:1",
  373. "many_to_many",
  374. "m:m",
  375. ]
  376. # reindex
  377. ReindexMethod = Union[FillnaOptions, Literal["nearest"]]
  378. MatplotlibColor = Union[str, Sequence[float]]
  379. TimeGrouperOrigin = Union[
  380. "Timestamp", Literal["epoch", "start", "start_day", "end", "end_day"]
  381. ]
  382. TimeAmbiguous = Union[Literal["infer", "NaT", "raise"], "npt.NDArray[np.bool_]"]
  383. TimeNonexistent = Union[
  384. Literal["shift_forward", "shift_backward", "NaT", "raise"], timedelta
  385. ]
  386. DropKeep = Literal["first", "last", False]
  387. CorrelationMethod = Union[
  388. Literal["pearson", "kendall", "spearman"], Callable[[np.ndarray, np.ndarray], float]
  389. ]
  390. AlignJoin = Literal["outer", "inner", "left", "right"]
  391. DtypeBackend = Literal["pyarrow", "numpy_nullable"]
  392. TimeUnit = Literal["s", "ms", "us", "ns"]
  393. OpenFileErrors = Literal[
  394. "strict",
  395. "ignore",
  396. "replace",
  397. "surrogateescape",
  398. "xmlcharrefreplace",
  399. "backslashreplace",
  400. "namereplace",
  401. ]
  402. # update
  403. UpdateJoin = Literal["left"]
  404. # applymap
  405. NaAction = Literal["ignore"]
  406. # from_dict
  407. FromDictOrient = Literal["columns", "index", "tight"]
  408. # to_gbc
  409. ToGbqIfexist = Literal["fail", "replace", "append"]
  410. # to_stata
  411. ToStataByteorder = Literal[">", "<", "little", "big"]
  412. # ExcelWriter
  413. ExcelWriterIfSheetExists = Literal["error", "new", "replace", "overlay"]
  414. # Offsets
  415. OffsetCalendar = Union[np.busdaycalendar, "AbstractHolidayCalendar"]
  416. # read_csv: usecols
  417. UsecolsArgType = Union[
  418. SequenceNotStr[Hashable],
  419. range,
  420. AnyArrayLike,
  421. Callable[[HashableT], bool],
  422. None,
  423. ]