test_lib.py 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299
  1. import pickle
  2. import numpy as np
  3. import pytest
  4. from pandas._libs import (
  5. Timedelta,
  6. lib,
  7. writers as libwriters,
  8. )
  9. from pandas.compat import IS64
  10. from pandas import Index
  11. import pandas._testing as tm
  12. class TestMisc:
  13. def test_max_len_string_array(self):
  14. arr = a = np.array(["foo", "b", np.nan], dtype="object")
  15. assert libwriters.max_len_string_array(arr) == 3
  16. # unicode
  17. arr = a.astype("U").astype(object)
  18. assert libwriters.max_len_string_array(arr) == 3
  19. # bytes for python3
  20. arr = a.astype("S").astype(object)
  21. assert libwriters.max_len_string_array(arr) == 3
  22. # raises
  23. msg = "No matching signature found"
  24. with pytest.raises(TypeError, match=msg):
  25. libwriters.max_len_string_array(arr.astype("U"))
  26. def test_fast_unique_multiple_list_gen_sort(self):
  27. keys = [["p", "a"], ["n", "d"], ["a", "s"]]
  28. gen = (key for key in keys)
  29. expected = np.array(["a", "d", "n", "p", "s"])
  30. out = lib.fast_unique_multiple_list_gen(gen, sort=True)
  31. tm.assert_numpy_array_equal(np.array(out), expected)
  32. gen = (key for key in keys)
  33. expected = np.array(["p", "a", "n", "d", "s"])
  34. out = lib.fast_unique_multiple_list_gen(gen, sort=False)
  35. tm.assert_numpy_array_equal(np.array(out), expected)
  36. def test_fast_multiget_timedelta_resos(self):
  37. # This will become relevant for test_constructor_dict_timedelta64_index
  38. # once Timedelta constructor preserves reso when passed a
  39. # np.timedelta64 object
  40. td = Timedelta(days=1)
  41. mapping1 = {td: 1}
  42. mapping2 = {td.as_unit("s"): 1}
  43. oindex = Index([td * n for n in range(3)])._values.astype(object)
  44. expected = lib.fast_multiget(mapping1, oindex)
  45. result = lib.fast_multiget(mapping2, oindex)
  46. tm.assert_numpy_array_equal(result, expected)
  47. # case that can't be cast to td64ns
  48. td = Timedelta(np.timedelta64(146000, "D"))
  49. assert hash(td) == hash(td.as_unit("ms"))
  50. assert hash(td) == hash(td.as_unit("us"))
  51. mapping1 = {td: 1}
  52. mapping2 = {td.as_unit("ms"): 1}
  53. oindex = Index([td * n for n in range(3)])._values.astype(object)
  54. expected = lib.fast_multiget(mapping1, oindex)
  55. result = lib.fast_multiget(mapping2, oindex)
  56. tm.assert_numpy_array_equal(result, expected)
  57. class TestIndexing:
  58. def test_maybe_indices_to_slice_left_edge(self):
  59. target = np.arange(100)
  60. # slice
  61. indices = np.array([], dtype=np.intp)
  62. maybe_slice = lib.maybe_indices_to_slice(indices, len(target))
  63. assert isinstance(maybe_slice, slice)
  64. tm.assert_numpy_array_equal(target[indices], target[maybe_slice])
  65. @pytest.mark.parametrize("end", [1, 2, 5, 20, 99])
  66. @pytest.mark.parametrize("step", [1, 2, 4])
  67. def test_maybe_indices_to_slice_left_edge_not_slice_end_steps(self, end, step):
  68. target = np.arange(100)
  69. indices = np.arange(0, end, step, dtype=np.intp)
  70. maybe_slice = lib.maybe_indices_to_slice(indices, len(target))
  71. assert isinstance(maybe_slice, slice)
  72. tm.assert_numpy_array_equal(target[indices], target[maybe_slice])
  73. # reverse
  74. indices = indices[::-1]
  75. maybe_slice = lib.maybe_indices_to_slice(indices, len(target))
  76. assert isinstance(maybe_slice, slice)
  77. tm.assert_numpy_array_equal(target[indices], target[maybe_slice])
  78. @pytest.mark.parametrize(
  79. "case", [[2, 1, 2, 0], [2, 2, 1, 0], [0, 1, 2, 1], [-2, 0, 2], [2, 0, -2]]
  80. )
  81. def test_maybe_indices_to_slice_left_edge_not_slice(self, case):
  82. # not slice
  83. target = np.arange(100)
  84. indices = np.array(case, dtype=np.intp)
  85. maybe_slice = lib.maybe_indices_to_slice(indices, len(target))
  86. assert not isinstance(maybe_slice, slice)
  87. tm.assert_numpy_array_equal(maybe_slice, indices)
  88. tm.assert_numpy_array_equal(target[indices], target[maybe_slice])
  89. @pytest.mark.parametrize("start", [0, 2, 5, 20, 97, 98])
  90. @pytest.mark.parametrize("step", [1, 2, 4])
  91. def test_maybe_indices_to_slice_right_edge(self, start, step):
  92. target = np.arange(100)
  93. # slice
  94. indices = np.arange(start, 99, step, dtype=np.intp)
  95. maybe_slice = lib.maybe_indices_to_slice(indices, len(target))
  96. assert isinstance(maybe_slice, slice)
  97. tm.assert_numpy_array_equal(target[indices], target[maybe_slice])
  98. # reverse
  99. indices = indices[::-1]
  100. maybe_slice = lib.maybe_indices_to_slice(indices, len(target))
  101. assert isinstance(maybe_slice, slice)
  102. tm.assert_numpy_array_equal(target[indices], target[maybe_slice])
  103. def test_maybe_indices_to_slice_right_edge_not_slice(self):
  104. # not slice
  105. target = np.arange(100)
  106. indices = np.array([97, 98, 99, 100], dtype=np.intp)
  107. maybe_slice = lib.maybe_indices_to_slice(indices, len(target))
  108. assert not isinstance(maybe_slice, slice)
  109. tm.assert_numpy_array_equal(maybe_slice, indices)
  110. msg = "index 100 is out of bounds for axis (0|1) with size 100"
  111. with pytest.raises(IndexError, match=msg):
  112. target[indices]
  113. with pytest.raises(IndexError, match=msg):
  114. target[maybe_slice]
  115. indices = np.array([100, 99, 98, 97], dtype=np.intp)
  116. maybe_slice = lib.maybe_indices_to_slice(indices, len(target))
  117. assert not isinstance(maybe_slice, slice)
  118. tm.assert_numpy_array_equal(maybe_slice, indices)
  119. with pytest.raises(IndexError, match=msg):
  120. target[indices]
  121. with pytest.raises(IndexError, match=msg):
  122. target[maybe_slice]
  123. @pytest.mark.parametrize(
  124. "case", [[99, 97, 99, 96], [99, 99, 98, 97], [98, 98, 97, 96]]
  125. )
  126. def test_maybe_indices_to_slice_right_edge_cases(self, case):
  127. target = np.arange(100)
  128. indices = np.array(case, dtype=np.intp)
  129. maybe_slice = lib.maybe_indices_to_slice(indices, len(target))
  130. assert not isinstance(maybe_slice, slice)
  131. tm.assert_numpy_array_equal(maybe_slice, indices)
  132. tm.assert_numpy_array_equal(target[indices], target[maybe_slice])
  133. @pytest.mark.parametrize("step", [1, 2, 4, 5, 8, 9])
  134. def test_maybe_indices_to_slice_both_edges(self, step):
  135. target = np.arange(10)
  136. # slice
  137. indices = np.arange(0, 9, step, dtype=np.intp)
  138. maybe_slice = lib.maybe_indices_to_slice(indices, len(target))
  139. assert isinstance(maybe_slice, slice)
  140. tm.assert_numpy_array_equal(target[indices], target[maybe_slice])
  141. # reverse
  142. indices = indices[::-1]
  143. maybe_slice = lib.maybe_indices_to_slice(indices, len(target))
  144. assert isinstance(maybe_slice, slice)
  145. tm.assert_numpy_array_equal(target[indices], target[maybe_slice])
  146. @pytest.mark.parametrize("case", [[4, 2, 0, -2], [2, 2, 1, 0], [0, 1, 2, 1]])
  147. def test_maybe_indices_to_slice_both_edges_not_slice(self, case):
  148. # not slice
  149. target = np.arange(10)
  150. indices = np.array(case, dtype=np.intp)
  151. maybe_slice = lib.maybe_indices_to_slice(indices, len(target))
  152. assert not isinstance(maybe_slice, slice)
  153. tm.assert_numpy_array_equal(maybe_slice, indices)
  154. tm.assert_numpy_array_equal(target[indices], target[maybe_slice])
  155. @pytest.mark.parametrize("start, end", [(2, 10), (5, 25), (65, 97)])
  156. @pytest.mark.parametrize("step", [1, 2, 4, 20])
  157. def test_maybe_indices_to_slice_middle(self, start, end, step):
  158. target = np.arange(100)
  159. # slice
  160. indices = np.arange(start, end, step, dtype=np.intp)
  161. maybe_slice = lib.maybe_indices_to_slice(indices, len(target))
  162. assert isinstance(maybe_slice, slice)
  163. tm.assert_numpy_array_equal(target[indices], target[maybe_slice])
  164. # reverse
  165. indices = indices[::-1]
  166. maybe_slice = lib.maybe_indices_to_slice(indices, len(target))
  167. assert isinstance(maybe_slice, slice)
  168. tm.assert_numpy_array_equal(target[indices], target[maybe_slice])
  169. @pytest.mark.parametrize(
  170. "case", [[14, 12, 10, 12], [12, 12, 11, 10], [10, 11, 12, 11]]
  171. )
  172. def test_maybe_indices_to_slice_middle_not_slice(self, case):
  173. # not slice
  174. target = np.arange(100)
  175. indices = np.array(case, dtype=np.intp)
  176. maybe_slice = lib.maybe_indices_to_slice(indices, len(target))
  177. assert not isinstance(maybe_slice, slice)
  178. tm.assert_numpy_array_equal(maybe_slice, indices)
  179. tm.assert_numpy_array_equal(target[indices], target[maybe_slice])
  180. def test_maybe_booleans_to_slice(self):
  181. arr = np.array([0, 0, 1, 1, 1, 0, 1], dtype=np.uint8)
  182. result = lib.maybe_booleans_to_slice(arr)
  183. assert result.dtype == np.bool_
  184. result = lib.maybe_booleans_to_slice(arr[:0])
  185. assert result == slice(0, 0)
  186. def test_get_reverse_indexer(self):
  187. indexer = np.array([-1, -1, 1, 2, 0, -1, 3, 4], dtype=np.intp)
  188. result = lib.get_reverse_indexer(indexer, 5)
  189. expected = np.array([4, 2, 3, 6, 7], dtype=np.intp)
  190. tm.assert_numpy_array_equal(result, expected)
  191. @pytest.mark.parametrize("dtype", ["int64", "int32"])
  192. def test_is_range_indexer(self, dtype):
  193. # GH#50592
  194. left = np.arange(0, 100, dtype=dtype)
  195. assert lib.is_range_indexer(left, 100)
  196. @pytest.mark.skipif(
  197. not IS64,
  198. reason="2**31 is too big for Py_ssize_t on 32-bit. "
  199. "It doesn't matter though since you cannot create an array that long on 32-bit",
  200. )
  201. @pytest.mark.parametrize("dtype", ["int64", "int32"])
  202. def test_is_range_indexer_big_n(self, dtype):
  203. # GH53616
  204. left = np.arange(0, 100, dtype=dtype)
  205. assert not lib.is_range_indexer(left, 2**31)
  206. @pytest.mark.parametrize("dtype", ["int64", "int32"])
  207. def test_is_range_indexer_not_equal(self, dtype):
  208. # GH#50592
  209. left = np.array([1, 2], dtype=dtype)
  210. assert not lib.is_range_indexer(left, 2)
  211. @pytest.mark.parametrize("dtype", ["int64", "int32"])
  212. def test_is_range_indexer_not_equal_shape(self, dtype):
  213. # GH#50592
  214. left = np.array([0, 1, 2], dtype=dtype)
  215. assert not lib.is_range_indexer(left, 2)
  216. def test_cache_readonly_preserve_docstrings():
  217. # GH18197
  218. assert Index.hasnans.__doc__ is not None
  219. def test_no_default_pickle():
  220. # GH#40397
  221. obj = tm.round_trip_pickle(lib.no_default)
  222. assert obj is lib.no_default
  223. def test_ensure_string_array_copy():
  224. # ensure the original array is not modified in case of copy=False with
  225. # pickle-roundtripped object dtype array
  226. # https://github.com/pandas-dev/pandas/issues/54654
  227. arr = np.array(["a", None], dtype=object)
  228. arr = pickle.loads(pickle.dumps(arr))
  229. result = lib.ensure_string_array(arr, copy=False)
  230. assert not np.shares_memory(arr, result)
  231. assert arr[1] is None
  232. assert result[1] is np.nan