test_internals.py 48 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422
  1. from datetime import (
  2. date,
  3. datetime,
  4. )
  5. import itertools
  6. import re
  7. import numpy as np
  8. import pytest
  9. from pandas._libs.internals import BlockPlacement
  10. from pandas.compat import IS64
  11. import pandas.util._test_decorators as td
  12. from pandas.core.dtypes.common import is_scalar
  13. import pandas as pd
  14. from pandas import (
  15. Categorical,
  16. DataFrame,
  17. DatetimeIndex,
  18. Index,
  19. IntervalIndex,
  20. Series,
  21. Timedelta,
  22. Timestamp,
  23. period_range,
  24. )
  25. import pandas._testing as tm
  26. import pandas.core.algorithms as algos
  27. from pandas.core.arrays import (
  28. DatetimeArray,
  29. SparseArray,
  30. TimedeltaArray,
  31. )
  32. from pandas.core.internals import (
  33. BlockManager,
  34. SingleBlockManager,
  35. make_block,
  36. )
  37. from pandas.core.internals.blocks import (
  38. ensure_block_shape,
  39. maybe_coerce_values,
  40. new_block,
  41. )
  42. # this file contains BlockManager specific tests
  43. # TODO(ArrayManager) factor out interleave_dtype tests
  44. pytestmark = td.skip_array_manager_invalid_test
  45. @pytest.fixture(params=[new_block, make_block])
  46. def block_maker(request):
  47. """
  48. Fixture to test both the internal new_block and pseudo-public make_block.
  49. """
  50. return request.param
  51. @pytest.fixture
  52. def mgr():
  53. return create_mgr(
  54. "a: f8; b: object; c: f8; d: object; e: f8;"
  55. "f: bool; g: i8; h: complex; i: datetime-1; j: datetime-2;"
  56. "k: M8[ns, US/Eastern]; l: M8[ns, CET];"
  57. )
  58. def assert_block_equal(left, right):
  59. tm.assert_numpy_array_equal(left.values, right.values)
  60. assert left.dtype == right.dtype
  61. assert isinstance(left.mgr_locs, BlockPlacement)
  62. assert isinstance(right.mgr_locs, BlockPlacement)
  63. tm.assert_numpy_array_equal(left.mgr_locs.as_array, right.mgr_locs.as_array)
  64. def get_numeric_mat(shape):
  65. arr = np.arange(shape[0])
  66. return np.lib.stride_tricks.as_strided(
  67. x=arr, shape=shape, strides=(arr.itemsize,) + (0,) * (len(shape) - 1)
  68. ).copy()
  69. N = 10
  70. def create_block(typestr, placement, item_shape=None, num_offset=0, maker=new_block):
  71. """
  72. Supported typestr:
  73. * float, f8, f4, f2
  74. * int, i8, i4, i2, i1
  75. * uint, u8, u4, u2, u1
  76. * complex, c16, c8
  77. * bool
  78. * object, string, O
  79. * datetime, dt, M8[ns], M8[ns, tz]
  80. * timedelta, td, m8[ns]
  81. * sparse (SparseArray with fill_value=0.0)
  82. * sparse_na (SparseArray with fill_value=np.nan)
  83. * category, category2
  84. """
  85. placement = BlockPlacement(placement)
  86. num_items = len(placement)
  87. if item_shape is None:
  88. item_shape = (N,)
  89. shape = (num_items,) + item_shape
  90. mat = get_numeric_mat(shape)
  91. if typestr in (
  92. "float",
  93. "f8",
  94. "f4",
  95. "f2",
  96. "int",
  97. "i8",
  98. "i4",
  99. "i2",
  100. "i1",
  101. "uint",
  102. "u8",
  103. "u4",
  104. "u2",
  105. "u1",
  106. ):
  107. values = mat.astype(typestr) + num_offset
  108. elif typestr in ("complex", "c16", "c8"):
  109. values = 1.0j * (mat.astype(typestr) + num_offset)
  110. elif typestr in ("object", "string", "O"):
  111. values = np.reshape([f"A{i:d}" for i in mat.ravel() + num_offset], shape)
  112. elif typestr in ("b", "bool"):
  113. values = np.ones(shape, dtype=np.bool_)
  114. elif typestr in ("datetime", "dt", "M8[ns]"):
  115. values = (mat * 1e9).astype("M8[ns]")
  116. elif typestr.startswith("M8[ns"):
  117. # datetime with tz
  118. m = re.search(r"M8\[ns,\s*(\w+\/?\w*)\]", typestr)
  119. assert m is not None, f"incompatible typestr -> {typestr}"
  120. tz = m.groups()[0]
  121. assert num_items == 1, "must have only 1 num items for a tz-aware"
  122. values = DatetimeIndex(np.arange(N) * 10**9, tz=tz)._data
  123. values = ensure_block_shape(values, ndim=len(shape))
  124. elif typestr in ("timedelta", "td", "m8[ns]"):
  125. values = (mat * 1).astype("m8[ns]")
  126. elif typestr in ("category",):
  127. values = Categorical([1, 1, 2, 2, 3, 3, 3, 3, 4, 4])
  128. elif typestr in ("category2",):
  129. values = Categorical(["a", "a", "a", "a", "b", "b", "c", "c", "c", "d"])
  130. elif typestr in ("sparse", "sparse_na"):
  131. if shape[-1] != 10:
  132. # We also are implicitly assuming this in the category cases above
  133. raise NotImplementedError
  134. assert all(s == 1 for s in shape[:-1])
  135. if typestr.endswith("_na"):
  136. fill_value = np.nan
  137. else:
  138. fill_value = 0.0
  139. values = SparseArray(
  140. [fill_value, fill_value, 1, 2, 3, fill_value, 4, 5, fill_value, 6],
  141. fill_value=fill_value,
  142. )
  143. arr = values.sp_values.view()
  144. arr += num_offset - 1
  145. else:
  146. raise ValueError(f'Unsupported typestr: "{typestr}"')
  147. values = maybe_coerce_values(values)
  148. return maker(values, placement=placement, ndim=len(shape))
  149. def create_single_mgr(typestr, num_rows=None):
  150. if num_rows is None:
  151. num_rows = N
  152. return SingleBlockManager(
  153. create_block(typestr, placement=slice(0, num_rows), item_shape=()),
  154. Index(np.arange(num_rows)),
  155. )
  156. def create_mgr(descr, item_shape=None):
  157. """
  158. Construct BlockManager from string description.
  159. String description syntax looks similar to np.matrix initializer. It looks
  160. like this::
  161. a,b,c: f8; d,e,f: i8
  162. Rules are rather simple:
  163. * see list of supported datatypes in `create_block` method
  164. * components are semicolon-separated
  165. * each component is `NAME,NAME,NAME: DTYPE_ID`
  166. * whitespace around colons & semicolons are removed
  167. * components with same DTYPE_ID are combined into single block
  168. * to force multiple blocks with same dtype, use '-SUFFIX'::
  169. 'a:f8-1; b:f8-2; c:f8-foobar'
  170. """
  171. if item_shape is None:
  172. item_shape = (N,)
  173. offset = 0
  174. mgr_items = []
  175. block_placements = {}
  176. for d in descr.split(";"):
  177. d = d.strip()
  178. if not len(d):
  179. continue
  180. names, blockstr = d.partition(":")[::2]
  181. blockstr = blockstr.strip()
  182. names = names.strip().split(",")
  183. mgr_items.extend(names)
  184. placement = list(np.arange(len(names)) + offset)
  185. try:
  186. block_placements[blockstr].extend(placement)
  187. except KeyError:
  188. block_placements[blockstr] = placement
  189. offset += len(names)
  190. mgr_items = Index(mgr_items)
  191. blocks = []
  192. num_offset = 0
  193. for blockstr, placement in block_placements.items():
  194. typestr = blockstr.split("-")[0]
  195. blocks.append(
  196. create_block(
  197. typestr, placement, item_shape=item_shape, num_offset=num_offset
  198. )
  199. )
  200. num_offset += len(placement)
  201. sblocks = sorted(blocks, key=lambda b: b.mgr_locs[0])
  202. return BlockManager(
  203. tuple(sblocks),
  204. [mgr_items] + [Index(np.arange(n)) for n in item_shape],
  205. )
  206. @pytest.fixture
  207. def fblock():
  208. return create_block("float", [0, 2, 4])
  209. class TestBlock:
  210. def test_constructor(self):
  211. int32block = create_block("i4", [0])
  212. assert int32block.dtype == np.int32
  213. @pytest.mark.parametrize(
  214. "typ, data",
  215. [
  216. ["float", [0, 2, 4]],
  217. ["complex", [7]],
  218. ["object", [1, 3]],
  219. ["bool", [5]],
  220. ],
  221. )
  222. def test_pickle(self, typ, data):
  223. blk = create_block(typ, data)
  224. assert_block_equal(tm.round_trip_pickle(blk), blk)
  225. def test_mgr_locs(self, fblock):
  226. assert isinstance(fblock.mgr_locs, BlockPlacement)
  227. tm.assert_numpy_array_equal(
  228. fblock.mgr_locs.as_array, np.array([0, 2, 4], dtype=np.intp)
  229. )
  230. def test_attrs(self, fblock):
  231. assert fblock.shape == fblock.values.shape
  232. assert fblock.dtype == fblock.values.dtype
  233. assert len(fblock) == len(fblock.values)
  234. def test_copy(self, fblock):
  235. cop = fblock.copy()
  236. assert cop is not fblock
  237. assert_block_equal(fblock, cop)
  238. def test_delete(self, fblock):
  239. newb = fblock.copy()
  240. locs = newb.mgr_locs
  241. nb = newb.delete(0)[0]
  242. assert newb.mgr_locs is locs
  243. assert nb is not newb
  244. tm.assert_numpy_array_equal(
  245. nb.mgr_locs.as_array, np.array([2, 4], dtype=np.intp)
  246. )
  247. assert not (newb.values[0] == 1).all()
  248. assert (nb.values[0] == 1).all()
  249. newb = fblock.copy()
  250. locs = newb.mgr_locs
  251. nb = newb.delete(1)
  252. assert len(nb) == 2
  253. assert newb.mgr_locs is locs
  254. tm.assert_numpy_array_equal(
  255. nb[0].mgr_locs.as_array, np.array([0], dtype=np.intp)
  256. )
  257. tm.assert_numpy_array_equal(
  258. nb[1].mgr_locs.as_array, np.array([4], dtype=np.intp)
  259. )
  260. assert not (newb.values[1] == 2).all()
  261. assert (nb[1].values[0] == 2).all()
  262. newb = fblock.copy()
  263. nb = newb.delete(2)
  264. assert len(nb) == 1
  265. tm.assert_numpy_array_equal(
  266. nb[0].mgr_locs.as_array, np.array([0, 2], dtype=np.intp)
  267. )
  268. assert (nb[0].values[1] == 1).all()
  269. newb = fblock.copy()
  270. with pytest.raises(IndexError, match=None):
  271. newb.delete(3)
  272. def test_delete_datetimelike(self):
  273. # dont use np.delete on values, as that will coerce from DTA/TDA to ndarray
  274. arr = np.arange(20, dtype="i8").reshape(5, 4).view("m8[ns]")
  275. df = DataFrame(arr)
  276. blk = df._mgr.blocks[0]
  277. assert isinstance(blk.values, TimedeltaArray)
  278. nb = blk.delete(1)
  279. assert len(nb) == 2
  280. assert isinstance(nb[0].values, TimedeltaArray)
  281. assert isinstance(nb[1].values, TimedeltaArray)
  282. df = DataFrame(arr.view("M8[ns]"))
  283. blk = df._mgr.blocks[0]
  284. assert isinstance(blk.values, DatetimeArray)
  285. nb = blk.delete([1, 3])
  286. assert len(nb) == 2
  287. assert isinstance(nb[0].values, DatetimeArray)
  288. assert isinstance(nb[1].values, DatetimeArray)
  289. def test_split(self):
  290. # GH#37799
  291. values = np.random.default_rng(2).standard_normal((3, 4))
  292. blk = new_block(values, placement=BlockPlacement([3, 1, 6]), ndim=2)
  293. result = blk._split()
  294. # check that we get views, not copies
  295. values[:] = -9999
  296. assert (blk.values == -9999).all()
  297. assert len(result) == 3
  298. expected = [
  299. new_block(values[[0]], placement=BlockPlacement([3]), ndim=2),
  300. new_block(values[[1]], placement=BlockPlacement([1]), ndim=2),
  301. new_block(values[[2]], placement=BlockPlacement([6]), ndim=2),
  302. ]
  303. for res, exp in zip(result, expected):
  304. assert_block_equal(res, exp)
  305. class TestBlockManager:
  306. def test_attrs(self):
  307. mgr = create_mgr("a,b,c: f8-1; d,e,f: f8-2")
  308. assert mgr.nblocks == 2
  309. assert len(mgr) == 6
  310. def test_duplicate_ref_loc_failure(self):
  311. tmp_mgr = create_mgr("a:bool; a: f8")
  312. axes, blocks = tmp_mgr.axes, tmp_mgr.blocks
  313. blocks[0].mgr_locs = BlockPlacement(np.array([0]))
  314. blocks[1].mgr_locs = BlockPlacement(np.array([0]))
  315. # test trying to create block manager with overlapping ref locs
  316. msg = "Gaps in blk ref_locs"
  317. with pytest.raises(AssertionError, match=msg):
  318. mgr = BlockManager(blocks, axes)
  319. mgr._rebuild_blknos_and_blklocs()
  320. blocks[0].mgr_locs = BlockPlacement(np.array([0]))
  321. blocks[1].mgr_locs = BlockPlacement(np.array([1]))
  322. mgr = BlockManager(blocks, axes)
  323. mgr.iget(1)
  324. def test_pickle(self, mgr):
  325. mgr2 = tm.round_trip_pickle(mgr)
  326. tm.assert_frame_equal(
  327. DataFrame._from_mgr(mgr, axes=mgr.axes),
  328. DataFrame._from_mgr(mgr2, axes=mgr2.axes),
  329. )
  330. # GH2431
  331. assert hasattr(mgr2, "_is_consolidated")
  332. assert hasattr(mgr2, "_known_consolidated")
  333. # reset to False on load
  334. assert not mgr2._is_consolidated
  335. assert not mgr2._known_consolidated
  336. @pytest.mark.parametrize("mgr_string", ["a,a,a:f8", "a: f8; a: i8"])
  337. def test_non_unique_pickle(self, mgr_string):
  338. mgr = create_mgr(mgr_string)
  339. mgr2 = tm.round_trip_pickle(mgr)
  340. tm.assert_frame_equal(
  341. DataFrame._from_mgr(mgr, axes=mgr.axes),
  342. DataFrame._from_mgr(mgr2, axes=mgr2.axes),
  343. )
  344. def test_categorical_block_pickle(self):
  345. mgr = create_mgr("a: category")
  346. mgr2 = tm.round_trip_pickle(mgr)
  347. tm.assert_frame_equal(
  348. DataFrame._from_mgr(mgr, axes=mgr.axes),
  349. DataFrame._from_mgr(mgr2, axes=mgr2.axes),
  350. )
  351. smgr = create_single_mgr("category")
  352. smgr2 = tm.round_trip_pickle(smgr)
  353. tm.assert_series_equal(
  354. Series()._constructor_from_mgr(smgr, axes=smgr.axes),
  355. Series()._constructor_from_mgr(smgr2, axes=smgr2.axes),
  356. )
  357. def test_iget(self):
  358. cols = Index(list("abc"))
  359. values = np.random.default_rng(2).random((3, 3))
  360. block = new_block(
  361. values=values.copy(),
  362. placement=BlockPlacement(np.arange(3, dtype=np.intp)),
  363. ndim=values.ndim,
  364. )
  365. mgr = BlockManager(blocks=(block,), axes=[cols, Index(np.arange(3))])
  366. tm.assert_almost_equal(mgr.iget(0).internal_values(), values[0])
  367. tm.assert_almost_equal(mgr.iget(1).internal_values(), values[1])
  368. tm.assert_almost_equal(mgr.iget(2).internal_values(), values[2])
  369. def test_set(self):
  370. mgr = create_mgr("a,b,c: int", item_shape=(3,))
  371. mgr.insert(len(mgr.items), "d", np.array(["foo"] * 3))
  372. mgr.iset(1, np.array(["bar"] * 3))
  373. tm.assert_numpy_array_equal(mgr.iget(0).internal_values(), np.array([0] * 3))
  374. tm.assert_numpy_array_equal(
  375. mgr.iget(1).internal_values(), np.array(["bar"] * 3, dtype=np.object_)
  376. )
  377. tm.assert_numpy_array_equal(mgr.iget(2).internal_values(), np.array([2] * 3))
  378. tm.assert_numpy_array_equal(
  379. mgr.iget(3).internal_values(), np.array(["foo"] * 3, dtype=np.object_)
  380. )
  381. def test_set_change_dtype(self, mgr):
  382. mgr.insert(len(mgr.items), "baz", np.zeros(N, dtype=bool))
  383. mgr.iset(mgr.items.get_loc("baz"), np.repeat("foo", N))
  384. idx = mgr.items.get_loc("baz")
  385. assert mgr.iget(idx).dtype == np.object_
  386. mgr2 = mgr.consolidate()
  387. mgr2.iset(mgr2.items.get_loc("baz"), np.repeat("foo", N))
  388. idx = mgr2.items.get_loc("baz")
  389. assert mgr2.iget(idx).dtype == np.object_
  390. mgr2.insert(
  391. len(mgr2.items),
  392. "quux",
  393. np.random.default_rng(2).standard_normal(N).astype(int),
  394. )
  395. idx = mgr2.items.get_loc("quux")
  396. assert mgr2.iget(idx).dtype == np.dtype(int)
  397. mgr2.iset(
  398. mgr2.items.get_loc("quux"), np.random.default_rng(2).standard_normal(N)
  399. )
  400. assert mgr2.iget(idx).dtype == np.float64
  401. def test_copy(self, mgr):
  402. cp = mgr.copy(deep=False)
  403. for blk, cp_blk in zip(mgr.blocks, cp.blocks):
  404. # view assertion
  405. tm.assert_equal(cp_blk.values, blk.values)
  406. if isinstance(blk.values, np.ndarray):
  407. assert cp_blk.values.base is blk.values.base
  408. else:
  409. # DatetimeTZBlock has DatetimeIndex values
  410. assert cp_blk.values._ndarray.base is blk.values._ndarray.base
  411. # copy(deep=True) consolidates, so the block-wise assertions will
  412. # fail is mgr is not consolidated
  413. mgr._consolidate_inplace()
  414. cp = mgr.copy(deep=True)
  415. for blk, cp_blk in zip(mgr.blocks, cp.blocks):
  416. bvals = blk.values
  417. cpvals = cp_blk.values
  418. tm.assert_equal(cpvals, bvals)
  419. if isinstance(cpvals, np.ndarray):
  420. lbase = cpvals.base
  421. rbase = bvals.base
  422. else:
  423. lbase = cpvals._ndarray.base
  424. rbase = bvals._ndarray.base
  425. # copy assertion we either have a None for a base or in case of
  426. # some blocks it is an array (e.g. datetimetz), but was copied
  427. if isinstance(cpvals, DatetimeArray):
  428. assert (lbase is None and rbase is None) or (lbase is not rbase)
  429. elif not isinstance(cpvals, np.ndarray):
  430. assert lbase is not rbase
  431. else:
  432. assert lbase is None and rbase is None
  433. def test_sparse(self):
  434. mgr = create_mgr("a: sparse-1; b: sparse-2")
  435. assert mgr.as_array().dtype == np.float64
  436. def test_sparse_mixed(self):
  437. mgr = create_mgr("a: sparse-1; b: sparse-2; c: f8")
  438. assert len(mgr.blocks) == 3
  439. assert isinstance(mgr, BlockManager)
  440. @pytest.mark.parametrize(
  441. "mgr_string, dtype",
  442. [("c: f4; d: f2", np.float32), ("c: f4; d: f2; e: f8", np.float64)],
  443. )
  444. def test_as_array_float(self, mgr_string, dtype):
  445. mgr = create_mgr(mgr_string)
  446. assert mgr.as_array().dtype == dtype
  447. @pytest.mark.parametrize(
  448. "mgr_string, dtype",
  449. [
  450. ("a: bool-1; b: bool-2", np.bool_),
  451. ("a: i8-1; b: i8-2; c: i4; d: i2; e: u1", np.int64),
  452. ("c: i4; d: i2; e: u1", np.int32),
  453. ],
  454. )
  455. def test_as_array_int_bool(self, mgr_string, dtype):
  456. mgr = create_mgr(mgr_string)
  457. assert mgr.as_array().dtype == dtype
  458. def test_as_array_datetime(self):
  459. mgr = create_mgr("h: datetime-1; g: datetime-2")
  460. assert mgr.as_array().dtype == "M8[ns]"
  461. def test_as_array_datetime_tz(self):
  462. mgr = create_mgr("h: M8[ns, US/Eastern]; g: M8[ns, CET]")
  463. assert mgr.iget(0).dtype == "datetime64[ns, US/Eastern]"
  464. assert mgr.iget(1).dtype == "datetime64[ns, CET]"
  465. assert mgr.as_array().dtype == "object"
  466. @pytest.mark.parametrize("t", ["float16", "float32", "float64", "int32", "int64"])
  467. def test_astype(self, t):
  468. # coerce all
  469. mgr = create_mgr("c: f4; d: f2; e: f8")
  470. t = np.dtype(t)
  471. tmgr = mgr.astype(t)
  472. assert tmgr.iget(0).dtype.type == t
  473. assert tmgr.iget(1).dtype.type == t
  474. assert tmgr.iget(2).dtype.type == t
  475. # mixed
  476. mgr = create_mgr("a,b: object; c: bool; d: datetime; e: f4; f: f2; g: f8")
  477. t = np.dtype(t)
  478. tmgr = mgr.astype(t, errors="ignore")
  479. assert tmgr.iget(2).dtype.type == t
  480. assert tmgr.iget(4).dtype.type == t
  481. assert tmgr.iget(5).dtype.type == t
  482. assert tmgr.iget(6).dtype.type == t
  483. assert tmgr.iget(0).dtype.type == np.object_
  484. assert tmgr.iget(1).dtype.type == np.object_
  485. if t != np.int64:
  486. assert tmgr.iget(3).dtype.type == np.datetime64
  487. else:
  488. assert tmgr.iget(3).dtype.type == t
  489. def test_convert(self, using_infer_string):
  490. def _compare(old_mgr, new_mgr):
  491. """compare the blocks, numeric compare ==, object don't"""
  492. old_blocks = set(old_mgr.blocks)
  493. new_blocks = set(new_mgr.blocks)
  494. assert len(old_blocks) == len(new_blocks)
  495. # compare non-numeric
  496. for b in old_blocks:
  497. found = False
  498. for nb in new_blocks:
  499. if (b.values == nb.values).all():
  500. found = True
  501. break
  502. assert found
  503. for b in new_blocks:
  504. found = False
  505. for ob in old_blocks:
  506. if (b.values == ob.values).all():
  507. found = True
  508. break
  509. assert found
  510. # noops
  511. mgr = create_mgr("f: i8; g: f8")
  512. new_mgr = mgr.convert(copy=True)
  513. _compare(mgr, new_mgr)
  514. # convert
  515. mgr = create_mgr("a,b,foo: object; f: i8; g: f8")
  516. mgr.iset(0, np.array(["1"] * N, dtype=np.object_))
  517. mgr.iset(1, np.array(["2."] * N, dtype=np.object_))
  518. mgr.iset(2, np.array(["foo."] * N, dtype=np.object_))
  519. new_mgr = mgr.convert(copy=True)
  520. dtype = "str" if using_infer_string else np.object_
  521. assert new_mgr.iget(0).dtype == dtype
  522. assert new_mgr.iget(1).dtype == dtype
  523. assert new_mgr.iget(2).dtype == dtype
  524. assert new_mgr.iget(3).dtype == np.int64
  525. assert new_mgr.iget(4).dtype == np.float64
  526. mgr = create_mgr(
  527. "a,b,foo: object; f: i4; bool: bool; dt: datetime; i: i8; g: f8; h: f2"
  528. )
  529. mgr.iset(0, np.array(["1"] * N, dtype=np.object_))
  530. mgr.iset(1, np.array(["2."] * N, dtype=np.object_))
  531. mgr.iset(2, np.array(["foo."] * N, dtype=np.object_))
  532. new_mgr = mgr.convert(copy=True)
  533. assert new_mgr.iget(0).dtype == dtype
  534. assert new_mgr.iget(1).dtype == dtype
  535. assert new_mgr.iget(2).dtype == dtype
  536. assert new_mgr.iget(3).dtype == np.int32
  537. assert new_mgr.iget(4).dtype == np.bool_
  538. assert new_mgr.iget(5).dtype.type, np.datetime64
  539. assert new_mgr.iget(6).dtype == np.int64
  540. assert new_mgr.iget(7).dtype == np.float64
  541. assert new_mgr.iget(8).dtype == np.float16
  542. def test_interleave(self):
  543. # self
  544. for dtype in ["f8", "i8", "object", "bool", "complex", "M8[ns]", "m8[ns]"]:
  545. mgr = create_mgr(f"a: {dtype}")
  546. assert mgr.as_array().dtype == dtype
  547. mgr = create_mgr(f"a: {dtype}; b: {dtype}")
  548. assert mgr.as_array().dtype == dtype
  549. @pytest.mark.parametrize(
  550. "mgr_string, dtype",
  551. [
  552. ("a: category", "i8"),
  553. ("a: category; b: category", "i8"),
  554. ("a: category; b: category2", "object"),
  555. ("a: category2", "object"),
  556. ("a: category2; b: category2", "object"),
  557. ("a: f8", "f8"),
  558. ("a: f8; b: i8", "f8"),
  559. ("a: f4; b: i8", "f8"),
  560. ("a: f4; b: i8; d: object", "object"),
  561. ("a: bool; b: i8", "object"),
  562. ("a: complex", "complex"),
  563. ("a: f8; b: category", "object"),
  564. ("a: M8[ns]; b: category", "object"),
  565. ("a: M8[ns]; b: bool", "object"),
  566. ("a: M8[ns]; b: i8", "object"),
  567. ("a: m8[ns]; b: bool", "object"),
  568. ("a: m8[ns]; b: i8", "object"),
  569. ("a: M8[ns]; b: m8[ns]", "object"),
  570. ],
  571. )
  572. def test_interleave_dtype(self, mgr_string, dtype):
  573. # will be converted according the actual dtype of the underlying
  574. mgr = create_mgr("a: category")
  575. assert mgr.as_array().dtype == "i8"
  576. mgr = create_mgr("a: category; b: category2")
  577. assert mgr.as_array().dtype == "object"
  578. mgr = create_mgr("a: category2")
  579. assert mgr.as_array().dtype == "object"
  580. # combinations
  581. mgr = create_mgr("a: f8")
  582. assert mgr.as_array().dtype == "f8"
  583. mgr = create_mgr("a: f8; b: i8")
  584. assert mgr.as_array().dtype == "f8"
  585. mgr = create_mgr("a: f4; b: i8")
  586. assert mgr.as_array().dtype == "f8"
  587. mgr = create_mgr("a: f4; b: i8; d: object")
  588. assert mgr.as_array().dtype == "object"
  589. mgr = create_mgr("a: bool; b: i8")
  590. assert mgr.as_array().dtype == "object"
  591. mgr = create_mgr("a: complex")
  592. assert mgr.as_array().dtype == "complex"
  593. mgr = create_mgr("a: f8; b: category")
  594. assert mgr.as_array().dtype == "f8"
  595. mgr = create_mgr("a: M8[ns]; b: category")
  596. assert mgr.as_array().dtype == "object"
  597. mgr = create_mgr("a: M8[ns]; b: bool")
  598. assert mgr.as_array().dtype == "object"
  599. mgr = create_mgr("a: M8[ns]; b: i8")
  600. assert mgr.as_array().dtype == "object"
  601. mgr = create_mgr("a: m8[ns]; b: bool")
  602. assert mgr.as_array().dtype == "object"
  603. mgr = create_mgr("a: m8[ns]; b: i8")
  604. assert mgr.as_array().dtype == "object"
  605. mgr = create_mgr("a: M8[ns]; b: m8[ns]")
  606. assert mgr.as_array().dtype == "object"
  607. def test_consolidate_ordering_issues(self, mgr):
  608. mgr.iset(mgr.items.get_loc("f"), np.random.default_rng(2).standard_normal(N))
  609. mgr.iset(mgr.items.get_loc("d"), np.random.default_rng(2).standard_normal(N))
  610. mgr.iset(mgr.items.get_loc("b"), np.random.default_rng(2).standard_normal(N))
  611. mgr.iset(mgr.items.get_loc("g"), np.random.default_rng(2).standard_normal(N))
  612. mgr.iset(mgr.items.get_loc("h"), np.random.default_rng(2).standard_normal(N))
  613. # we have datetime/tz blocks in mgr
  614. cons = mgr.consolidate()
  615. assert cons.nblocks == 4
  616. cons = mgr.consolidate().get_numeric_data()
  617. assert cons.nblocks == 1
  618. assert isinstance(cons.blocks[0].mgr_locs, BlockPlacement)
  619. tm.assert_numpy_array_equal(
  620. cons.blocks[0].mgr_locs.as_array, np.arange(len(cons.items), dtype=np.intp)
  621. )
  622. def test_reindex_items(self):
  623. # mgr is not consolidated, f8 & f8-2 blocks
  624. mgr = create_mgr("a: f8; b: i8; c: f8; d: i8; e: f8; f: bool; g: f8-2")
  625. reindexed = mgr.reindex_axis(["g", "c", "a", "d"], axis=0)
  626. # reindex_axis does not consolidate_inplace, as that risks failing to
  627. # invalidate _item_cache
  628. assert not reindexed.is_consolidated()
  629. tm.assert_index_equal(reindexed.items, Index(["g", "c", "a", "d"]))
  630. tm.assert_almost_equal(
  631. mgr.iget(6).internal_values(), reindexed.iget(0).internal_values()
  632. )
  633. tm.assert_almost_equal(
  634. mgr.iget(2).internal_values(), reindexed.iget(1).internal_values()
  635. )
  636. tm.assert_almost_equal(
  637. mgr.iget(0).internal_values(), reindexed.iget(2).internal_values()
  638. )
  639. tm.assert_almost_equal(
  640. mgr.iget(3).internal_values(), reindexed.iget(3).internal_values()
  641. )
  642. def test_get_numeric_data(self, using_copy_on_write):
  643. mgr = create_mgr(
  644. "int: int; float: float; complex: complex;"
  645. "str: object; bool: bool; obj: object; dt: datetime",
  646. item_shape=(3,),
  647. )
  648. mgr.iset(5, np.array([1, 2, 3], dtype=np.object_))
  649. numeric = mgr.get_numeric_data()
  650. tm.assert_index_equal(numeric.items, Index(["int", "float", "complex", "bool"]))
  651. tm.assert_almost_equal(
  652. mgr.iget(mgr.items.get_loc("float")).internal_values(),
  653. numeric.iget(numeric.items.get_loc("float")).internal_values(),
  654. )
  655. # Check sharing
  656. numeric.iset(
  657. numeric.items.get_loc("float"),
  658. np.array([100.0, 200.0, 300.0]),
  659. inplace=True,
  660. )
  661. if using_copy_on_write:
  662. tm.assert_almost_equal(
  663. mgr.iget(mgr.items.get_loc("float")).internal_values(),
  664. np.array([1.0, 1.0, 1.0]),
  665. )
  666. else:
  667. tm.assert_almost_equal(
  668. mgr.iget(mgr.items.get_loc("float")).internal_values(),
  669. np.array([100.0, 200.0, 300.0]),
  670. )
  671. def test_get_bool_data(self, using_copy_on_write):
  672. mgr = create_mgr(
  673. "int: int; float: float; complex: complex;"
  674. "str: object; bool: bool; obj: object; dt: datetime",
  675. item_shape=(3,),
  676. )
  677. mgr.iset(6, np.array([True, False, True], dtype=np.object_))
  678. bools = mgr.get_bool_data()
  679. tm.assert_index_equal(bools.items, Index(["bool"]))
  680. tm.assert_almost_equal(
  681. mgr.iget(mgr.items.get_loc("bool")).internal_values(),
  682. bools.iget(bools.items.get_loc("bool")).internal_values(),
  683. )
  684. bools.iset(0, np.array([True, False, True]), inplace=True)
  685. if using_copy_on_write:
  686. tm.assert_numpy_array_equal(
  687. mgr.iget(mgr.items.get_loc("bool")).internal_values(),
  688. np.array([True, True, True]),
  689. )
  690. else:
  691. tm.assert_numpy_array_equal(
  692. mgr.iget(mgr.items.get_loc("bool")).internal_values(),
  693. np.array([True, False, True]),
  694. )
  695. def test_unicode_repr_doesnt_raise(self):
  696. repr(create_mgr("b,\u05d0: object"))
  697. @pytest.mark.parametrize(
  698. "mgr_string", ["a,b,c: i8-1; d,e,f: i8-2", "a,a,a: i8-1; b,b,b: i8-2"]
  699. )
  700. def test_equals(self, mgr_string):
  701. # unique items
  702. bm1 = create_mgr(mgr_string)
  703. bm2 = BlockManager(bm1.blocks[::-1], bm1.axes)
  704. assert bm1.equals(bm2)
  705. @pytest.mark.parametrize(
  706. "mgr_string",
  707. [
  708. "a:i8;b:f8", # basic case
  709. "a:i8;b:f8;c:c8;d:b", # many types
  710. "a:i8;e:dt;f:td;g:string", # more types
  711. "a:i8;b:category;c:category2", # categories
  712. "c:sparse;d:sparse_na;b:f8", # sparse
  713. ],
  714. )
  715. def test_equals_block_order_different_dtypes(self, mgr_string):
  716. # GH 9330
  717. bm = create_mgr(mgr_string)
  718. block_perms = itertools.permutations(bm.blocks)
  719. for bm_perm in block_perms:
  720. bm_this = BlockManager(tuple(bm_perm), bm.axes)
  721. assert bm.equals(bm_this)
  722. assert bm_this.equals(bm)
  723. def test_single_mgr_ctor(self):
  724. mgr = create_single_mgr("f8", num_rows=5)
  725. assert mgr.external_values().tolist() == [0.0, 1.0, 2.0, 3.0, 4.0]
  726. @pytest.mark.parametrize("value", [1, "True", [1, 2, 3], 5.0])
  727. def test_validate_bool_args(self, value):
  728. bm1 = create_mgr("a,b,c: i8-1; d,e,f: i8-2")
  729. msg = (
  730. 'For argument "inplace" expected type bool, '
  731. f"received type {type(value).__name__}."
  732. )
  733. with pytest.raises(ValueError, match=msg):
  734. bm1.replace_list([1], [2], inplace=value)
  735. def test_iset_split_block(self):
  736. bm = create_mgr("a,b,c: i8; d: f8")
  737. bm._iset_split_block(0, np.array([0]))
  738. tm.assert_numpy_array_equal(
  739. bm.blklocs, np.array([0, 0, 1, 0], dtype="int64" if IS64 else "int32")
  740. )
  741. # First indexer currently does not have a block associated with it in case
  742. tm.assert_numpy_array_equal(
  743. bm.blknos, np.array([0, 0, 0, 1], dtype="int64" if IS64 else "int32")
  744. )
  745. assert len(bm.blocks) == 2
  746. def test_iset_split_block_values(self):
  747. bm = create_mgr("a,b,c: i8; d: f8")
  748. bm._iset_split_block(0, np.array([0]), np.array([list(range(10))]))
  749. tm.assert_numpy_array_equal(
  750. bm.blklocs, np.array([0, 0, 1, 0], dtype="int64" if IS64 else "int32")
  751. )
  752. # First indexer currently does not have a block associated with it in case
  753. tm.assert_numpy_array_equal(
  754. bm.blknos, np.array([0, 2, 2, 1], dtype="int64" if IS64 else "int32")
  755. )
  756. assert len(bm.blocks) == 3
  757. def _as_array(mgr):
  758. if mgr.ndim == 1:
  759. return mgr.external_values()
  760. return mgr.as_array().T
  761. class TestIndexing:
  762. # Nosetests-style data-driven tests.
  763. #
  764. # This test applies different indexing routines to block managers and
  765. # compares the outcome to the result of same operations on np.ndarray.
  766. #
  767. # NOTE: sparse (SparseBlock with fill_value != np.nan) fail a lot of tests
  768. # and are disabled.
  769. MANAGERS = [
  770. create_single_mgr("f8", N),
  771. create_single_mgr("i8", N),
  772. # 2-dim
  773. create_mgr("a,b,c,d,e,f: f8", item_shape=(N,)),
  774. create_mgr("a,b,c,d,e,f: i8", item_shape=(N,)),
  775. create_mgr("a,b: f8; c,d: i8; e,f: string", item_shape=(N,)),
  776. create_mgr("a,b: f8; c,d: i8; e,f: f8", item_shape=(N,)),
  777. ]
  778. @pytest.mark.parametrize("mgr", MANAGERS)
  779. def test_get_slice(self, mgr):
  780. def assert_slice_ok(mgr, axis, slobj):
  781. mat = _as_array(mgr)
  782. # we maybe using an ndarray to test slicing and
  783. # might not be the full length of the axis
  784. if isinstance(slobj, np.ndarray):
  785. ax = mgr.axes[axis]
  786. if len(ax) and len(slobj) and len(slobj) != len(ax):
  787. slobj = np.concatenate(
  788. [slobj, np.zeros(len(ax) - len(slobj), dtype=bool)]
  789. )
  790. if isinstance(slobj, slice):
  791. sliced = mgr.get_slice(slobj, axis=axis)
  792. elif (
  793. mgr.ndim == 1
  794. and axis == 0
  795. and isinstance(slobj, np.ndarray)
  796. and slobj.dtype == bool
  797. ):
  798. sliced = mgr.get_rows_with_mask(slobj)
  799. else:
  800. # BlockManager doesn't support non-slice, SingleBlockManager
  801. # doesn't support axis > 0
  802. raise TypeError(slobj)
  803. mat_slobj = (slice(None),) * axis + (slobj,)
  804. tm.assert_numpy_array_equal(
  805. mat[mat_slobj], _as_array(sliced), check_dtype=False
  806. )
  807. tm.assert_index_equal(mgr.axes[axis][slobj], sliced.axes[axis])
  808. assert mgr.ndim <= 2, mgr.ndim
  809. for ax in range(mgr.ndim):
  810. # slice
  811. assert_slice_ok(mgr, ax, slice(None))
  812. assert_slice_ok(mgr, ax, slice(3))
  813. assert_slice_ok(mgr, ax, slice(100))
  814. assert_slice_ok(mgr, ax, slice(1, 4))
  815. assert_slice_ok(mgr, ax, slice(3, 0, -2))
  816. if mgr.ndim < 2:
  817. # 2D only support slice objects
  818. # boolean mask
  819. assert_slice_ok(mgr, ax, np.ones(mgr.shape[ax], dtype=np.bool_))
  820. assert_slice_ok(mgr, ax, np.zeros(mgr.shape[ax], dtype=np.bool_))
  821. if mgr.shape[ax] >= 3:
  822. assert_slice_ok(mgr, ax, np.arange(mgr.shape[ax]) % 3 == 0)
  823. assert_slice_ok(
  824. mgr, ax, np.array([True, True, False], dtype=np.bool_)
  825. )
  826. @pytest.mark.parametrize("mgr", MANAGERS)
  827. def test_take(self, mgr):
  828. def assert_take_ok(mgr, axis, indexer):
  829. mat = _as_array(mgr)
  830. taken = mgr.take(indexer, axis)
  831. tm.assert_numpy_array_equal(
  832. np.take(mat, indexer, axis), _as_array(taken), check_dtype=False
  833. )
  834. tm.assert_index_equal(mgr.axes[axis].take(indexer), taken.axes[axis])
  835. for ax in range(mgr.ndim):
  836. # take/fancy indexer
  837. assert_take_ok(mgr, ax, indexer=np.array([], dtype=np.intp))
  838. assert_take_ok(mgr, ax, indexer=np.array([0, 0, 0], dtype=np.intp))
  839. assert_take_ok(
  840. mgr, ax, indexer=np.array(list(range(mgr.shape[ax])), dtype=np.intp)
  841. )
  842. if mgr.shape[ax] >= 3:
  843. assert_take_ok(mgr, ax, indexer=np.array([0, 1, 2], dtype=np.intp))
  844. assert_take_ok(mgr, ax, indexer=np.array([-1, -2, -3], dtype=np.intp))
  845. @pytest.mark.parametrize("mgr", MANAGERS)
  846. @pytest.mark.parametrize("fill_value", [None, np.nan, 100.0])
  847. def test_reindex_axis(self, fill_value, mgr):
  848. def assert_reindex_axis_is_ok(mgr, axis, new_labels, fill_value):
  849. mat = _as_array(mgr)
  850. indexer = mgr.axes[axis].get_indexer_for(new_labels)
  851. reindexed = mgr.reindex_axis(new_labels, axis, fill_value=fill_value)
  852. tm.assert_numpy_array_equal(
  853. algos.take_nd(mat, indexer, axis, fill_value=fill_value),
  854. _as_array(reindexed),
  855. check_dtype=False,
  856. )
  857. tm.assert_index_equal(reindexed.axes[axis], new_labels)
  858. for ax in range(mgr.ndim):
  859. assert_reindex_axis_is_ok(mgr, ax, Index([]), fill_value)
  860. assert_reindex_axis_is_ok(mgr, ax, mgr.axes[ax], fill_value)
  861. assert_reindex_axis_is_ok(mgr, ax, mgr.axes[ax][[0, 0, 0]], fill_value)
  862. assert_reindex_axis_is_ok(mgr, ax, Index(["foo", "bar", "baz"]), fill_value)
  863. assert_reindex_axis_is_ok(
  864. mgr, ax, Index(["foo", mgr.axes[ax][0], "baz"]), fill_value
  865. )
  866. if mgr.shape[ax] >= 3:
  867. assert_reindex_axis_is_ok(mgr, ax, mgr.axes[ax][:-3], fill_value)
  868. assert_reindex_axis_is_ok(mgr, ax, mgr.axes[ax][-3::-1], fill_value)
  869. assert_reindex_axis_is_ok(
  870. mgr, ax, mgr.axes[ax][[0, 1, 2, 0, 1, 2]], fill_value
  871. )
  872. @pytest.mark.parametrize("mgr", MANAGERS)
  873. @pytest.mark.parametrize("fill_value", [None, np.nan, 100.0])
  874. def test_reindex_indexer(self, fill_value, mgr):
  875. def assert_reindex_indexer_is_ok(mgr, axis, new_labels, indexer, fill_value):
  876. mat = _as_array(mgr)
  877. reindexed_mat = algos.take_nd(mat, indexer, axis, fill_value=fill_value)
  878. reindexed = mgr.reindex_indexer(
  879. new_labels, indexer, axis, fill_value=fill_value
  880. )
  881. tm.assert_numpy_array_equal(
  882. reindexed_mat, _as_array(reindexed), check_dtype=False
  883. )
  884. tm.assert_index_equal(reindexed.axes[axis], new_labels)
  885. for ax in range(mgr.ndim):
  886. assert_reindex_indexer_is_ok(
  887. mgr, ax, Index([]), np.array([], dtype=np.intp), fill_value
  888. )
  889. assert_reindex_indexer_is_ok(
  890. mgr, ax, mgr.axes[ax], np.arange(mgr.shape[ax]), fill_value
  891. )
  892. assert_reindex_indexer_is_ok(
  893. mgr,
  894. ax,
  895. Index(["foo"] * mgr.shape[ax]),
  896. np.arange(mgr.shape[ax]),
  897. fill_value,
  898. )
  899. assert_reindex_indexer_is_ok(
  900. mgr, ax, mgr.axes[ax][::-1], np.arange(mgr.shape[ax]), fill_value
  901. )
  902. assert_reindex_indexer_is_ok(
  903. mgr, ax, mgr.axes[ax], np.arange(mgr.shape[ax])[::-1], fill_value
  904. )
  905. assert_reindex_indexer_is_ok(
  906. mgr, ax, Index(["foo", "bar", "baz"]), np.array([0, 0, 0]), fill_value
  907. )
  908. assert_reindex_indexer_is_ok(
  909. mgr, ax, Index(["foo", "bar", "baz"]), np.array([-1, 0, -1]), fill_value
  910. )
  911. assert_reindex_indexer_is_ok(
  912. mgr,
  913. ax,
  914. Index(["foo", mgr.axes[ax][0], "baz"]),
  915. np.array([-1, -1, -1]),
  916. fill_value,
  917. )
  918. if mgr.shape[ax] >= 3:
  919. assert_reindex_indexer_is_ok(
  920. mgr,
  921. ax,
  922. Index(["foo", "bar", "baz"]),
  923. np.array([0, 1, 2]),
  924. fill_value,
  925. )
  926. class TestBlockPlacement:
  927. @pytest.mark.parametrize(
  928. "slc, expected",
  929. [
  930. (slice(0, 4), 4),
  931. (slice(0, 4, 2), 2),
  932. (slice(0, 3, 2), 2),
  933. (slice(0, 1, 2), 1),
  934. (slice(1, 0, -1), 1),
  935. ],
  936. )
  937. def test_slice_len(self, slc, expected):
  938. assert len(BlockPlacement(slc)) == expected
  939. @pytest.mark.parametrize("slc", [slice(1, 1, 0), slice(1, 2, 0)])
  940. def test_zero_step_raises(self, slc):
  941. msg = "slice step cannot be zero"
  942. with pytest.raises(ValueError, match=msg):
  943. BlockPlacement(slc)
  944. def test_slice_canonize_negative_stop(self):
  945. # GH#37524 negative stop is OK with negative step and positive start
  946. slc = slice(3, -1, -2)
  947. bp = BlockPlacement(slc)
  948. assert bp.indexer == slice(3, None, -2)
  949. @pytest.mark.parametrize(
  950. "slc",
  951. [
  952. slice(None, None),
  953. slice(10, None),
  954. slice(None, None, -1),
  955. slice(None, 10, -1),
  956. # These are "unbounded" because negative index will
  957. # change depending on container shape.
  958. slice(-1, None),
  959. slice(None, -1),
  960. slice(-1, -1),
  961. slice(-1, None, -1),
  962. slice(None, -1, -1),
  963. slice(-1, -1, -1),
  964. ],
  965. )
  966. def test_unbounded_slice_raises(self, slc):
  967. msg = "unbounded slice"
  968. with pytest.raises(ValueError, match=msg):
  969. BlockPlacement(slc)
  970. @pytest.mark.parametrize(
  971. "slc",
  972. [
  973. slice(0, 0),
  974. slice(100, 0),
  975. slice(100, 100),
  976. slice(100, 100, -1),
  977. slice(0, 100, -1),
  978. ],
  979. )
  980. def test_not_slice_like_slices(self, slc):
  981. assert not BlockPlacement(slc).is_slice_like
  982. @pytest.mark.parametrize(
  983. "arr, slc",
  984. [
  985. ([0], slice(0, 1, 1)),
  986. ([100], slice(100, 101, 1)),
  987. ([0, 1, 2], slice(0, 3, 1)),
  988. ([0, 5, 10], slice(0, 15, 5)),
  989. ([0, 100], slice(0, 200, 100)),
  990. ([2, 1], slice(2, 0, -1)),
  991. ],
  992. )
  993. def test_array_to_slice_conversion(self, arr, slc):
  994. assert BlockPlacement(arr).as_slice == slc
  995. @pytest.mark.parametrize(
  996. "arr",
  997. [
  998. [],
  999. [-1],
  1000. [-1, -2, -3],
  1001. [-10],
  1002. [-1],
  1003. [-1, 0, 1, 2],
  1004. [-2, 0, 2, 4],
  1005. [1, 0, -1],
  1006. [1, 1, 1],
  1007. ],
  1008. )
  1009. def test_not_slice_like_arrays(self, arr):
  1010. assert not BlockPlacement(arr).is_slice_like
  1011. @pytest.mark.parametrize(
  1012. "slc, expected",
  1013. [(slice(0, 3), [0, 1, 2]), (slice(0, 0), []), (slice(3, 0), [])],
  1014. )
  1015. def test_slice_iter(self, slc, expected):
  1016. assert list(BlockPlacement(slc)) == expected
  1017. @pytest.mark.parametrize(
  1018. "slc, arr",
  1019. [
  1020. (slice(0, 3), [0, 1, 2]),
  1021. (slice(0, 0), []),
  1022. (slice(3, 0), []),
  1023. (slice(3, 0, -1), [3, 2, 1]),
  1024. ],
  1025. )
  1026. def test_slice_to_array_conversion(self, slc, arr):
  1027. tm.assert_numpy_array_equal(
  1028. BlockPlacement(slc).as_array, np.asarray(arr, dtype=np.intp)
  1029. )
  1030. def test_blockplacement_add(self):
  1031. bpl = BlockPlacement(slice(0, 5))
  1032. assert bpl.add(1).as_slice == slice(1, 6, 1)
  1033. assert bpl.add(np.arange(5)).as_slice == slice(0, 10, 2)
  1034. assert list(bpl.add(np.arange(5, 0, -1))) == [5, 5, 5, 5, 5]
  1035. @pytest.mark.parametrize(
  1036. "val, inc, expected",
  1037. [
  1038. (slice(0, 0), 0, []),
  1039. (slice(1, 4), 0, [1, 2, 3]),
  1040. (slice(3, 0, -1), 0, [3, 2, 1]),
  1041. ([1, 2, 4], 0, [1, 2, 4]),
  1042. (slice(0, 0), 10, []),
  1043. (slice(1, 4), 10, [11, 12, 13]),
  1044. (slice(3, 0, -1), 10, [13, 12, 11]),
  1045. ([1, 2, 4], 10, [11, 12, 14]),
  1046. (slice(0, 0), -1, []),
  1047. (slice(1, 4), -1, [0, 1, 2]),
  1048. ([1, 2, 4], -1, [0, 1, 3]),
  1049. ],
  1050. )
  1051. def test_blockplacement_add_int(self, val, inc, expected):
  1052. assert list(BlockPlacement(val).add(inc)) == expected
  1053. @pytest.mark.parametrize("val", [slice(1, 4), [1, 2, 4]])
  1054. def test_blockplacement_add_int_raises(self, val):
  1055. msg = "iadd causes length change"
  1056. with pytest.raises(ValueError, match=msg):
  1057. BlockPlacement(val).add(-10)
  1058. class TestCanHoldElement:
  1059. @pytest.fixture(
  1060. params=[
  1061. lambda x: x,
  1062. lambda x: x.to_series(),
  1063. lambda x: x._data,
  1064. lambda x: list(x),
  1065. lambda x: x.astype(object),
  1066. lambda x: np.asarray(x),
  1067. lambda x: x[0],
  1068. lambda x: x[:0],
  1069. ]
  1070. )
  1071. def element(self, request):
  1072. """
  1073. Functions that take an Index and return an element that should have
  1074. blk._can_hold_element(element) for a Block with this index's dtype.
  1075. """
  1076. return request.param
  1077. def test_datetime_block_can_hold_element(self):
  1078. block = create_block("datetime", [0])
  1079. assert block._can_hold_element([])
  1080. # We will check that block._can_hold_element iff arr.__setitem__ works
  1081. arr = pd.array(block.values.ravel())
  1082. # coerce None
  1083. assert block._can_hold_element(None)
  1084. arr[0] = None
  1085. assert arr[0] is pd.NaT
  1086. # coerce different types of datetime objects
  1087. vals = [np.datetime64("2010-10-10"), datetime(2010, 10, 10)]
  1088. for val in vals:
  1089. assert block._can_hold_element(val)
  1090. arr[0] = val
  1091. val = date(2010, 10, 10)
  1092. assert not block._can_hold_element(val)
  1093. msg = (
  1094. "value should be a 'Timestamp', 'NaT', "
  1095. "or array of those. Got 'date' instead."
  1096. )
  1097. with pytest.raises(TypeError, match=msg):
  1098. arr[0] = val
  1099. @pytest.mark.parametrize("dtype", [np.int64, np.uint64, np.float64])
  1100. def test_interval_can_hold_element_emptylist(self, dtype, element):
  1101. arr = np.array([1, 3, 4], dtype=dtype)
  1102. ii = IntervalIndex.from_breaks(arr)
  1103. blk = new_block(ii._data, BlockPlacement([1]), ndim=2)
  1104. assert blk._can_hold_element([])
  1105. # TODO: check this holds for all blocks
  1106. @pytest.mark.parametrize("dtype", [np.int64, np.uint64, np.float64])
  1107. def test_interval_can_hold_element(self, dtype, element):
  1108. arr = np.array([1, 3, 4, 9], dtype=dtype)
  1109. ii = IntervalIndex.from_breaks(arr)
  1110. blk = new_block(ii._data, BlockPlacement([1]), ndim=2)
  1111. elem = element(ii)
  1112. self.check_series_setitem(elem, ii, True)
  1113. assert blk._can_hold_element(elem)
  1114. # Careful: to get the expected Series-inplace behavior we need
  1115. # `elem` to not have the same length as `arr`
  1116. ii2 = IntervalIndex.from_breaks(arr[:-1], closed="neither")
  1117. elem = element(ii2)
  1118. with tm.assert_produces_warning(FutureWarning):
  1119. self.check_series_setitem(elem, ii, False)
  1120. assert not blk._can_hold_element(elem)
  1121. ii3 = IntervalIndex.from_breaks([Timestamp(1), Timestamp(3), Timestamp(4)])
  1122. elem = element(ii3)
  1123. with tm.assert_produces_warning(FutureWarning):
  1124. self.check_series_setitem(elem, ii, False)
  1125. assert not blk._can_hold_element(elem)
  1126. ii4 = IntervalIndex.from_breaks([Timedelta(1), Timedelta(3), Timedelta(4)])
  1127. elem = element(ii4)
  1128. with tm.assert_produces_warning(FutureWarning):
  1129. self.check_series_setitem(elem, ii, False)
  1130. assert not blk._can_hold_element(elem)
  1131. def test_period_can_hold_element_emptylist(self):
  1132. pi = period_range("2016", periods=3, freq="Y")
  1133. blk = new_block(pi._data.reshape(1, 3), BlockPlacement([1]), ndim=2)
  1134. assert blk._can_hold_element([])
  1135. def test_period_can_hold_element(self, element):
  1136. pi = period_range("2016", periods=3, freq="Y")
  1137. elem = element(pi)
  1138. self.check_series_setitem(elem, pi, True)
  1139. # Careful: to get the expected Series-inplace behavior we need
  1140. # `elem` to not have the same length as `arr`
  1141. pi2 = pi.asfreq("D")[:-1]
  1142. elem = element(pi2)
  1143. with tm.assert_produces_warning(FutureWarning):
  1144. self.check_series_setitem(elem, pi, False)
  1145. dti = pi.to_timestamp("s")[:-1]
  1146. elem = element(dti)
  1147. with tm.assert_produces_warning(FutureWarning):
  1148. self.check_series_setitem(elem, pi, False)
  1149. def check_can_hold_element(self, obj, elem, inplace: bool):
  1150. blk = obj._mgr.blocks[0]
  1151. if inplace:
  1152. assert blk._can_hold_element(elem)
  1153. else:
  1154. assert not blk._can_hold_element(elem)
  1155. def check_series_setitem(self, elem, index: Index, inplace: bool):
  1156. arr = index._data.copy()
  1157. ser = Series(arr, copy=False)
  1158. self.check_can_hold_element(ser, elem, inplace)
  1159. if is_scalar(elem):
  1160. ser[0] = elem
  1161. else:
  1162. ser[: len(elem)] = elem
  1163. if inplace:
  1164. assert ser.array is arr # i.e. setting was done inplace
  1165. else:
  1166. assert ser.dtype == object
  1167. class TestShouldStore:
  1168. def test_should_store_categorical(self):
  1169. cat = Categorical(["A", "B", "C"])
  1170. df = DataFrame(cat)
  1171. blk = df._mgr.blocks[0]
  1172. # matching dtype
  1173. assert blk.should_store(cat)
  1174. assert blk.should_store(cat[:-1])
  1175. # different dtype
  1176. assert not blk.should_store(cat.as_ordered())
  1177. # ndarray instead of Categorical
  1178. assert not blk.should_store(np.asarray(cat))
  1179. def test_validate_ndim():
  1180. values = np.array([1.0, 2.0])
  1181. placement = BlockPlacement(slice(2))
  1182. msg = r"Wrong number of dimensions. values.ndim != ndim \[1 != 2\]"
  1183. with pytest.raises(ValueError, match=msg):
  1184. make_block(values, placement, ndim=2)
  1185. def test_block_shape():
  1186. idx = Index([0, 1, 2, 3, 4])
  1187. a = Series([1, 2, 3]).reindex(idx)
  1188. b = Series(Categorical([1, 2, 3])).reindex(idx)
  1189. assert a._mgr.blocks[0].mgr_locs.indexer == b._mgr.blocks[0].mgr_locs.indexer
  1190. def test_make_block_no_pandas_array(block_maker):
  1191. # https://github.com/pandas-dev/pandas/pull/24866
  1192. arr = pd.arrays.NumpyExtensionArray(np.array([1, 2]))
  1193. # NumpyExtensionArray, no dtype
  1194. result = block_maker(arr, BlockPlacement(slice(len(arr))), ndim=arr.ndim)
  1195. assert result.dtype.kind in ["i", "u"]
  1196. if block_maker is make_block:
  1197. # new_block requires caller to unwrap NumpyExtensionArray
  1198. assert result.is_extension is False
  1199. # NumpyExtensionArray, NumpyEADtype
  1200. result = block_maker(arr, slice(len(arr)), dtype=arr.dtype, ndim=arr.ndim)
  1201. assert result.dtype.kind in ["i", "u"]
  1202. assert result.is_extension is False
  1203. # new_block no longer taked dtype keyword
  1204. # ndarray, NumpyEADtype
  1205. result = block_maker(
  1206. arr.to_numpy(), slice(len(arr)), dtype=arr.dtype, ndim=arr.ndim
  1207. )
  1208. assert result.dtype.kind in ["i", "u"]
  1209. assert result.is_extension is False