hashtable.pyi 8.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274
  1. from collections.abc import Hashable
  2. from typing import (
  3. Any,
  4. Literal,
  5. overload,
  6. )
  7. import numpy as np
  8. from pandas._typing import npt
  9. def unique_label_indices(
  10. labels: np.ndarray, # const int64_t[:]
  11. ) -> np.ndarray: ...
  12. class Factorizer:
  13. count: int
  14. uniques: Any
  15. def __init__(self, size_hint: int, uses_mask: bool = False) -> None: ...
  16. def get_count(self) -> int: ...
  17. def factorize(
  18. self,
  19. values: np.ndarray,
  20. na_sentinel=...,
  21. na_value=...,
  22. mask=...,
  23. ) -> npt.NDArray[np.intp]: ...
  24. def hash_inner_join(
  25. self, values: np.ndarray, mask=...
  26. ) -> tuple[np.ndarray, np.ndarray]: ...
  27. class ObjectFactorizer(Factorizer):
  28. table: PyObjectHashTable
  29. uniques: ObjectVector
  30. class Int64Factorizer(Factorizer):
  31. table: Int64HashTable
  32. uniques: Int64Vector
  33. class UInt64Factorizer(Factorizer):
  34. table: UInt64HashTable
  35. uniques: UInt64Vector
  36. class Int32Factorizer(Factorizer):
  37. table: Int32HashTable
  38. uniques: Int32Vector
  39. class UInt32Factorizer(Factorizer):
  40. table: UInt32HashTable
  41. uniques: UInt32Vector
  42. class Int16Factorizer(Factorizer):
  43. table: Int16HashTable
  44. uniques: Int16Vector
  45. class UInt16Factorizer(Factorizer):
  46. table: UInt16HashTable
  47. uniques: UInt16Vector
  48. class Int8Factorizer(Factorizer):
  49. table: Int8HashTable
  50. uniques: Int8Vector
  51. class UInt8Factorizer(Factorizer):
  52. table: UInt8HashTable
  53. uniques: UInt8Vector
  54. class Float64Factorizer(Factorizer):
  55. table: Float64HashTable
  56. uniques: Float64Vector
  57. class Float32Factorizer(Factorizer):
  58. table: Float32HashTable
  59. uniques: Float32Vector
  60. class Complex64Factorizer(Factorizer):
  61. table: Complex64HashTable
  62. uniques: Complex64Vector
  63. class Complex128Factorizer(Factorizer):
  64. table: Complex128HashTable
  65. uniques: Complex128Vector
  66. class Int64Vector:
  67. def __init__(self, *args) -> None: ...
  68. def __len__(self) -> int: ...
  69. def to_array(self) -> npt.NDArray[np.int64]: ...
  70. class Int32Vector:
  71. def __init__(self, *args) -> None: ...
  72. def __len__(self) -> int: ...
  73. def to_array(self) -> npt.NDArray[np.int32]: ...
  74. class Int16Vector:
  75. def __init__(self, *args) -> None: ...
  76. def __len__(self) -> int: ...
  77. def to_array(self) -> npt.NDArray[np.int16]: ...
  78. class Int8Vector:
  79. def __init__(self, *args) -> None: ...
  80. def __len__(self) -> int: ...
  81. def to_array(self) -> npt.NDArray[np.int8]: ...
  82. class UInt64Vector:
  83. def __init__(self, *args) -> None: ...
  84. def __len__(self) -> int: ...
  85. def to_array(self) -> npt.NDArray[np.uint64]: ...
  86. class UInt32Vector:
  87. def __init__(self, *args) -> None: ...
  88. def __len__(self) -> int: ...
  89. def to_array(self) -> npt.NDArray[np.uint32]: ...
  90. class UInt16Vector:
  91. def __init__(self, *args) -> None: ...
  92. def __len__(self) -> int: ...
  93. def to_array(self) -> npt.NDArray[np.uint16]: ...
  94. class UInt8Vector:
  95. def __init__(self, *args) -> None: ...
  96. def __len__(self) -> int: ...
  97. def to_array(self) -> npt.NDArray[np.uint8]: ...
  98. class Float64Vector:
  99. def __init__(self, *args) -> None: ...
  100. def __len__(self) -> int: ...
  101. def to_array(self) -> npt.NDArray[np.float64]: ...
  102. class Float32Vector:
  103. def __init__(self, *args) -> None: ...
  104. def __len__(self) -> int: ...
  105. def to_array(self) -> npt.NDArray[np.float32]: ...
  106. class Complex128Vector:
  107. def __init__(self, *args) -> None: ...
  108. def __len__(self) -> int: ...
  109. def to_array(self) -> npt.NDArray[np.complex128]: ...
  110. class Complex64Vector:
  111. def __init__(self, *args) -> None: ...
  112. def __len__(self) -> int: ...
  113. def to_array(self) -> npt.NDArray[np.complex64]: ...
  114. class StringVector:
  115. def __init__(self, *args) -> None: ...
  116. def __len__(self) -> int: ...
  117. def to_array(self) -> npt.NDArray[np.object_]: ...
  118. class ObjectVector:
  119. def __init__(self, *args) -> None: ...
  120. def __len__(self) -> int: ...
  121. def to_array(self) -> npt.NDArray[np.object_]: ...
  122. class HashTable:
  123. # NB: The base HashTable class does _not_ actually have these methods;
  124. # we are putting them here for the sake of mypy to avoid
  125. # reproducing them in each subclass below.
  126. def __init__(self, size_hint: int = ..., uses_mask: bool = ...) -> None: ...
  127. def __len__(self) -> int: ...
  128. def __contains__(self, key: Hashable) -> bool: ...
  129. def sizeof(self, deep: bool = ...) -> int: ...
  130. def get_state(self) -> dict[str, int]: ...
  131. # TODO: `val/key` type is subclass-specific
  132. def get_item(self, val): ... # TODO: return type?
  133. def set_item(self, key, val) -> None: ...
  134. def get_na(self): ... # TODO: return type?
  135. def set_na(self, val) -> None: ...
  136. def map_locations(
  137. self,
  138. values: np.ndarray, # np.ndarray[subclass-specific]
  139. mask: npt.NDArray[np.bool_] | None = ...,
  140. ) -> None: ...
  141. def lookup(
  142. self,
  143. values: np.ndarray, # np.ndarray[subclass-specific]
  144. mask: npt.NDArray[np.bool_] | None = ...,
  145. ) -> npt.NDArray[np.intp]: ...
  146. def get_labels(
  147. self,
  148. values: np.ndarray, # np.ndarray[subclass-specific]
  149. uniques, # SubclassTypeVector
  150. count_prior: int = ...,
  151. na_sentinel: int = ...,
  152. na_value: object = ...,
  153. mask=...,
  154. ) -> npt.NDArray[np.intp]: ...
  155. @overload
  156. def unique(
  157. self,
  158. values: np.ndarray, # np.ndarray[subclass-specific]
  159. *,
  160. return_inverse: Literal[False] = ...,
  161. mask: None = ...,
  162. ) -> np.ndarray: ... # np.ndarray[subclass-specific]
  163. @overload
  164. def unique(
  165. self,
  166. values: np.ndarray, # np.ndarray[subclass-specific]
  167. *,
  168. return_inverse: Literal[True],
  169. mask: None = ...,
  170. ) -> tuple[np.ndarray, npt.NDArray[np.intp]]: ... # np.ndarray[subclass-specific]
  171. @overload
  172. def unique(
  173. self,
  174. values: np.ndarray, # np.ndarray[subclass-specific]
  175. *,
  176. return_inverse: Literal[False] = ...,
  177. mask: npt.NDArray[np.bool_],
  178. ) -> tuple[
  179. np.ndarray,
  180. npt.NDArray[np.bool_],
  181. ]: ... # np.ndarray[subclass-specific]
  182. def factorize(
  183. self,
  184. values: np.ndarray, # np.ndarray[subclass-specific]
  185. na_sentinel: int = ...,
  186. na_value: object = ...,
  187. mask=...,
  188. ignore_na: bool = True,
  189. ) -> tuple[np.ndarray, npt.NDArray[np.intp]]: ... # np.ndarray[subclass-specific]
  190. def hash_inner_join(
  191. self, values: np.ndarray, mask=...
  192. ) -> tuple[np.ndarray, np.ndarray]: ...
  193. class Complex128HashTable(HashTable): ...
  194. class Complex64HashTable(HashTable): ...
  195. class Float64HashTable(HashTable): ...
  196. class Float32HashTable(HashTable): ...
  197. class Int64HashTable(HashTable):
  198. # Only Int64HashTable has get_labels_groupby, map_keys_to_values
  199. def get_labels_groupby(
  200. self,
  201. values: npt.NDArray[np.int64], # const int64_t[:]
  202. ) -> tuple[npt.NDArray[np.intp], npt.NDArray[np.int64]]: ...
  203. def map_keys_to_values(
  204. self,
  205. keys: npt.NDArray[np.int64],
  206. values: npt.NDArray[np.int64], # const int64_t[:]
  207. ) -> None: ...
  208. class Int32HashTable(HashTable): ...
  209. class Int16HashTable(HashTable): ...
  210. class Int8HashTable(HashTable): ...
  211. class UInt64HashTable(HashTable): ...
  212. class UInt32HashTable(HashTable): ...
  213. class UInt16HashTable(HashTable): ...
  214. class UInt8HashTable(HashTable): ...
  215. class StringHashTable(HashTable): ...
  216. class PyObjectHashTable(HashTable): ...
  217. class IntpHashTable(HashTable): ...
  218. def duplicated(
  219. values: np.ndarray,
  220. keep: Literal["last", "first", False] = ...,
  221. mask: npt.NDArray[np.bool_] | None = ...,
  222. ) -> npt.NDArray[np.bool_]: ...
  223. def mode(
  224. values: np.ndarray, dropna: bool, mask: npt.NDArray[np.bool_] | None = ...
  225. ) -> np.ndarray: ...
  226. def value_count(
  227. values: np.ndarray,
  228. dropna: bool,
  229. mask: npt.NDArray[np.bool_] | None = ...,
  230. ) -> tuple[np.ndarray, npt.NDArray[np.int64], int]: ... # np.ndarray[same-as-values]
  231. # arr and values should have same dtype
  232. def ismember(
  233. arr: np.ndarray,
  234. values: np.ndarray,
  235. ) -> npt.NDArray[np.bool_]: ...
  236. def object_hash(obj) -> int: ...
  237. def objects_are_equal(a, b) -> bool: ...