test_shift.py 27 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764
  1. import numpy as np
  2. import pytest
  3. import pandas.util._test_decorators as td
  4. import pandas as pd
  5. from pandas import (
  6. CategoricalIndex,
  7. DataFrame,
  8. Index,
  9. NaT,
  10. Series,
  11. date_range,
  12. offsets,
  13. )
  14. import pandas._testing as tm
  15. class TestDataFrameShift:
  16. def test_shift_axis1_with_valid_fill_value_one_array(self):
  17. # Case with axis=1 that does not go through the "len(arrays)>1" path
  18. # in DataFrame.shift
  19. data = np.random.default_rng(2).standard_normal((5, 3))
  20. df = DataFrame(data)
  21. res = df.shift(axis=1, periods=1, fill_value=12345)
  22. expected = df.T.shift(periods=1, fill_value=12345).T
  23. tm.assert_frame_equal(res, expected)
  24. # same but with an 1D ExtensionArray backing it
  25. df2 = df[[0]].astype("Float64")
  26. res2 = df2.shift(axis=1, periods=1, fill_value=12345)
  27. expected2 = DataFrame([12345] * 5, dtype="Float64")
  28. tm.assert_frame_equal(res2, expected2)
  29. def test_shift_deprecate_freq_and_fill_value(self, frame_or_series):
  30. # Can't pass both!
  31. obj = frame_or_series(
  32. np.random.default_rng(2).standard_normal(5),
  33. index=date_range("1/1/2000", periods=5, freq="h"),
  34. )
  35. msg = (
  36. "Passing a 'freq' together with a 'fill_value' silently ignores the "
  37. "fill_value"
  38. )
  39. with tm.assert_produces_warning(FutureWarning, match=msg):
  40. obj.shift(1, fill_value=1, freq="h")
  41. if frame_or_series is DataFrame:
  42. obj.columns = date_range("1/1/2000", periods=1, freq="h")
  43. with tm.assert_produces_warning(FutureWarning, match=msg):
  44. obj.shift(1, axis=1, fill_value=1, freq="h")
  45. @pytest.mark.parametrize(
  46. "input_data, output_data",
  47. [(np.empty(shape=(0,)), []), (np.ones(shape=(2,)), [np.nan, 1.0])],
  48. )
  49. def test_shift_non_writable_array(self, input_data, output_data, frame_or_series):
  50. # GH21049 Verify whether non writable numpy array is shiftable
  51. input_data.setflags(write=False)
  52. result = frame_or_series(input_data).shift(1)
  53. if frame_or_series is not Series:
  54. # need to explicitly specify columns in the empty case
  55. expected = frame_or_series(
  56. output_data,
  57. index=range(len(output_data)),
  58. columns=range(1),
  59. dtype="float64",
  60. )
  61. else:
  62. expected = frame_or_series(output_data, dtype="float64")
  63. tm.assert_equal(result, expected)
  64. def test_shift_mismatched_freq(self, frame_or_series):
  65. ts = frame_or_series(
  66. np.random.default_rng(2).standard_normal(5),
  67. index=date_range("1/1/2000", periods=5, freq="h"),
  68. )
  69. result = ts.shift(1, freq="5min")
  70. exp_index = ts.index.shift(1, freq="5min")
  71. tm.assert_index_equal(result.index, exp_index)
  72. # GH#1063, multiple of same base
  73. result = ts.shift(1, freq="4h")
  74. exp_index = ts.index + offsets.Hour(4)
  75. tm.assert_index_equal(result.index, exp_index)
  76. @pytest.mark.parametrize(
  77. "obj",
  78. [
  79. Series([np.arange(5)]),
  80. date_range("1/1/2011", periods=24, freq="h"),
  81. Series(range(5), index=date_range("2017", periods=5)),
  82. ],
  83. )
  84. @pytest.mark.parametrize("shift_size", [0, 1, 2])
  85. def test_shift_always_copy(self, obj, shift_size, frame_or_series):
  86. # GH#22397
  87. if frame_or_series is not Series:
  88. obj = obj.to_frame()
  89. assert obj.shift(shift_size) is not obj
  90. def test_shift_object_non_scalar_fill(self):
  91. # shift requires scalar fill_value except for object dtype
  92. ser = Series(range(3))
  93. with pytest.raises(ValueError, match="fill_value must be a scalar"):
  94. ser.shift(1, fill_value=[])
  95. df = ser.to_frame()
  96. with pytest.raises(ValueError, match="fill_value must be a scalar"):
  97. df.shift(1, fill_value=np.arange(3))
  98. obj_ser = ser.astype(object)
  99. result = obj_ser.shift(1, fill_value={})
  100. assert result[0] == {}
  101. obj_df = obj_ser.to_frame()
  102. result = obj_df.shift(1, fill_value={})
  103. assert result.iloc[0, 0] == {}
  104. def test_shift_int(self, datetime_frame, frame_or_series):
  105. ts = tm.get_obj(datetime_frame, frame_or_series).astype(int)
  106. shifted = ts.shift(1)
  107. expected = ts.astype(float).shift(1)
  108. tm.assert_equal(shifted, expected)
  109. @pytest.mark.parametrize("dtype", ["int32", "int64"])
  110. def test_shift_32bit_take(self, frame_or_series, dtype):
  111. # 32-bit taking
  112. # GH#8129
  113. index = date_range("2000-01-01", periods=5)
  114. arr = np.arange(5, dtype=dtype)
  115. s1 = frame_or_series(arr, index=index)
  116. p = arr[1]
  117. result = s1.shift(periods=p)
  118. expected = frame_or_series([np.nan, 0, 1, 2, 3], index=index)
  119. tm.assert_equal(result, expected)
  120. @pytest.mark.parametrize("periods", [1, 2, 3, 4])
  121. def test_shift_preserve_freqstr(self, periods, frame_or_series):
  122. # GH#21275
  123. obj = frame_or_series(
  124. range(periods),
  125. index=date_range("2016-1-1 00:00:00", periods=periods, freq="h"),
  126. )
  127. result = obj.shift(1, "2h")
  128. expected = frame_or_series(
  129. range(periods),
  130. index=date_range("2016-1-1 02:00:00", periods=periods, freq="h"),
  131. )
  132. tm.assert_equal(result, expected)
  133. def test_shift_dst(self, frame_or_series):
  134. # GH#13926
  135. dates = date_range("2016-11-06", freq="h", periods=10, tz="US/Eastern")
  136. obj = frame_or_series(dates)
  137. res = obj.shift(0)
  138. tm.assert_equal(res, obj)
  139. assert tm.get_dtype(res) == "datetime64[ns, US/Eastern]"
  140. res = obj.shift(1)
  141. exp_vals = [NaT] + dates.astype(object).values.tolist()[:9]
  142. exp = frame_or_series(exp_vals)
  143. tm.assert_equal(res, exp)
  144. assert tm.get_dtype(res) == "datetime64[ns, US/Eastern]"
  145. res = obj.shift(-2)
  146. exp_vals = dates.astype(object).values.tolist()[2:] + [NaT, NaT]
  147. exp = frame_or_series(exp_vals)
  148. tm.assert_equal(res, exp)
  149. assert tm.get_dtype(res) == "datetime64[ns, US/Eastern]"
  150. @pytest.mark.parametrize("ex", [10, -10, 20, -20])
  151. def test_shift_dst_beyond(self, frame_or_series, ex):
  152. # GH#13926
  153. dates = date_range("2016-11-06", freq="h", periods=10, tz="US/Eastern")
  154. obj = frame_or_series(dates)
  155. res = obj.shift(ex)
  156. exp = frame_or_series([NaT] * 10, dtype="datetime64[ns, US/Eastern]")
  157. tm.assert_equal(res, exp)
  158. assert tm.get_dtype(res) == "datetime64[ns, US/Eastern]"
  159. def test_shift_by_zero(self, datetime_frame, frame_or_series):
  160. # shift by 0
  161. obj = tm.get_obj(datetime_frame, frame_or_series)
  162. unshifted = obj.shift(0)
  163. tm.assert_equal(unshifted, obj)
  164. def test_shift(self, datetime_frame):
  165. # naive shift
  166. ser = datetime_frame["A"]
  167. shifted = datetime_frame.shift(5)
  168. tm.assert_index_equal(shifted.index, datetime_frame.index)
  169. shifted_ser = ser.shift(5)
  170. tm.assert_series_equal(shifted["A"], shifted_ser)
  171. shifted = datetime_frame.shift(-5)
  172. tm.assert_index_equal(shifted.index, datetime_frame.index)
  173. shifted_ser = ser.shift(-5)
  174. tm.assert_series_equal(shifted["A"], shifted_ser)
  175. unshifted = datetime_frame.shift(5).shift(-5)
  176. tm.assert_numpy_array_equal(
  177. unshifted.dropna().values, datetime_frame.values[:-5]
  178. )
  179. unshifted_ser = ser.shift(5).shift(-5)
  180. tm.assert_numpy_array_equal(unshifted_ser.dropna().values, ser.values[:-5])
  181. def test_shift_by_offset(self, datetime_frame, frame_or_series):
  182. # shift by DateOffset
  183. obj = tm.get_obj(datetime_frame, frame_or_series)
  184. offset = offsets.BDay()
  185. shifted = obj.shift(5, freq=offset)
  186. assert len(shifted) == len(obj)
  187. unshifted = shifted.shift(-5, freq=offset)
  188. tm.assert_equal(unshifted, obj)
  189. shifted2 = obj.shift(5, freq="B")
  190. tm.assert_equal(shifted, shifted2)
  191. unshifted = obj.shift(0, freq=offset)
  192. tm.assert_equal(unshifted, obj)
  193. d = obj.index[0]
  194. shifted_d = d + offset * 5
  195. if frame_or_series is DataFrame:
  196. tm.assert_series_equal(obj.xs(d), shifted.xs(shifted_d), check_names=False)
  197. else:
  198. tm.assert_almost_equal(obj.at[d], shifted.at[shifted_d])
  199. def test_shift_with_periodindex(self, frame_or_series):
  200. # Shifting with PeriodIndex
  201. ps = DataFrame(
  202. np.arange(4, dtype=float), index=pd.period_range("2020-01-01", periods=4)
  203. )
  204. ps = tm.get_obj(ps, frame_or_series)
  205. shifted = ps.shift(1)
  206. unshifted = shifted.shift(-1)
  207. tm.assert_index_equal(shifted.index, ps.index)
  208. tm.assert_index_equal(unshifted.index, ps.index)
  209. if frame_or_series is DataFrame:
  210. tm.assert_numpy_array_equal(
  211. unshifted.iloc[:, 0].dropna().values, ps.iloc[:-1, 0].values
  212. )
  213. else:
  214. tm.assert_numpy_array_equal(unshifted.dropna().values, ps.values[:-1])
  215. shifted2 = ps.shift(1, "D")
  216. shifted3 = ps.shift(1, offsets.Day())
  217. tm.assert_equal(shifted2, shifted3)
  218. tm.assert_equal(ps, shifted2.shift(-1, "D"))
  219. msg = "does not match PeriodIndex freq"
  220. with pytest.raises(ValueError, match=msg):
  221. ps.shift(freq="W")
  222. # legacy support
  223. shifted4 = ps.shift(1, freq="D")
  224. tm.assert_equal(shifted2, shifted4)
  225. shifted5 = ps.shift(1, freq=offsets.Day())
  226. tm.assert_equal(shifted5, shifted4)
  227. def test_shift_other_axis(self):
  228. # shift other axis
  229. # GH#6371
  230. df = DataFrame(np.random.default_rng(2).random((10, 5)))
  231. expected = pd.concat(
  232. [DataFrame(np.nan, index=df.index, columns=[0]), df.iloc[:, 0:-1]],
  233. ignore_index=True,
  234. axis=1,
  235. )
  236. result = df.shift(1, axis=1)
  237. tm.assert_frame_equal(result, expected)
  238. def test_shift_named_axis(self):
  239. # shift named axis
  240. df = DataFrame(np.random.default_rng(2).random((10, 5)))
  241. expected = pd.concat(
  242. [DataFrame(np.nan, index=df.index, columns=[0]), df.iloc[:, 0:-1]],
  243. ignore_index=True,
  244. axis=1,
  245. )
  246. result = df.shift(1, axis="columns")
  247. tm.assert_frame_equal(result, expected)
  248. def test_shift_other_axis_with_freq(self, datetime_frame):
  249. obj = datetime_frame.T
  250. offset = offsets.BDay()
  251. # GH#47039
  252. shifted = obj.shift(5, freq=offset, axis=1)
  253. assert len(shifted) == len(obj)
  254. unshifted = shifted.shift(-5, freq=offset, axis=1)
  255. tm.assert_equal(unshifted, obj)
  256. def test_shift_bool(self):
  257. df = DataFrame({"high": [True, False], "low": [False, False]})
  258. rs = df.shift(1)
  259. xp = DataFrame(
  260. np.array([[np.nan, np.nan], [True, False]], dtype=object),
  261. columns=["high", "low"],
  262. )
  263. tm.assert_frame_equal(rs, xp)
  264. def test_shift_categorical1(self, frame_or_series):
  265. # GH#9416
  266. obj = frame_or_series(["a", "b", "c", "d"], dtype="category")
  267. rt = obj.shift(1).shift(-1)
  268. tm.assert_equal(obj.iloc[:-1], rt.dropna())
  269. def get_cat_values(ndframe):
  270. # For Series we could just do ._values; for DataFrame
  271. # we may be able to do this if we ever have 2D Categoricals
  272. return ndframe._mgr.arrays[0]
  273. cat = get_cat_values(obj)
  274. sp1 = obj.shift(1)
  275. tm.assert_index_equal(obj.index, sp1.index)
  276. assert np.all(get_cat_values(sp1).codes[:1] == -1)
  277. assert np.all(cat.codes[:-1] == get_cat_values(sp1).codes[1:])
  278. sn2 = obj.shift(-2)
  279. tm.assert_index_equal(obj.index, sn2.index)
  280. assert np.all(get_cat_values(sn2).codes[-2:] == -1)
  281. assert np.all(cat.codes[2:] == get_cat_values(sn2).codes[:-2])
  282. tm.assert_index_equal(cat.categories, get_cat_values(sp1).categories)
  283. tm.assert_index_equal(cat.categories, get_cat_values(sn2).categories)
  284. def test_shift_categorical(self):
  285. # GH#9416
  286. s1 = Series(["a", "b", "c"], dtype="category")
  287. s2 = Series(["A", "B", "C"], dtype="category")
  288. df = DataFrame({"one": s1, "two": s2})
  289. rs = df.shift(1)
  290. xp = DataFrame({"one": s1.shift(1), "two": s2.shift(1)})
  291. tm.assert_frame_equal(rs, xp)
  292. def test_shift_categorical_fill_value(self, frame_or_series):
  293. ts = frame_or_series(["a", "b", "c", "d"], dtype="category")
  294. res = ts.shift(1, fill_value="a")
  295. expected = frame_or_series(
  296. pd.Categorical(
  297. ["a", "a", "b", "c"], categories=["a", "b", "c", "d"], ordered=False
  298. )
  299. )
  300. tm.assert_equal(res, expected)
  301. # check for incorrect fill_value
  302. msg = r"Cannot setitem on a Categorical with a new category \(f\)"
  303. with pytest.raises(TypeError, match=msg):
  304. ts.shift(1, fill_value="f")
  305. def test_shift_fill_value(self, frame_or_series):
  306. # GH#24128
  307. dti = date_range("1/1/2000", periods=5, freq="h")
  308. ts = frame_or_series([1.0, 2.0, 3.0, 4.0, 5.0], index=dti)
  309. exp = frame_or_series([0.0, 1.0, 2.0, 3.0, 4.0], index=dti)
  310. # check that fill value works
  311. result = ts.shift(1, fill_value=0.0)
  312. tm.assert_equal(result, exp)
  313. exp = frame_or_series([0.0, 0.0, 1.0, 2.0, 3.0], index=dti)
  314. result = ts.shift(2, fill_value=0.0)
  315. tm.assert_equal(result, exp)
  316. ts = frame_or_series([1, 2, 3])
  317. res = ts.shift(2, fill_value=0)
  318. assert tm.get_dtype(res) == tm.get_dtype(ts)
  319. # retain integer dtype
  320. obj = frame_or_series([1, 2, 3, 4, 5], index=dti)
  321. exp = frame_or_series([0, 1, 2, 3, 4], index=dti)
  322. result = obj.shift(1, fill_value=0)
  323. tm.assert_equal(result, exp)
  324. exp = frame_or_series([0, 0, 1, 2, 3], index=dti)
  325. result = obj.shift(2, fill_value=0)
  326. tm.assert_equal(result, exp)
  327. def test_shift_empty(self):
  328. # Regression test for GH#8019
  329. df = DataFrame({"foo": []})
  330. rs = df.shift(-1)
  331. tm.assert_frame_equal(df, rs)
  332. def test_shift_duplicate_columns(self):
  333. # GH#9092; verify that position-based shifting works
  334. # in the presence of duplicate columns
  335. column_lists = [list(range(5)), [1] * 5, [1, 1, 2, 2, 1]]
  336. data = np.random.default_rng(2).standard_normal((20, 5))
  337. shifted = []
  338. for columns in column_lists:
  339. df = DataFrame(data.copy(), columns=columns)
  340. for s in range(5):
  341. df.iloc[:, s] = df.iloc[:, s].shift(s + 1)
  342. df.columns = range(5)
  343. shifted.append(df)
  344. # sanity check the base case
  345. nulls = shifted[0].isna().sum()
  346. tm.assert_series_equal(nulls, Series(range(1, 6), dtype="int64"))
  347. # check all answers are the same
  348. tm.assert_frame_equal(shifted[0], shifted[1])
  349. tm.assert_frame_equal(shifted[0], shifted[2])
  350. def test_shift_axis1_multiple_blocks(self, using_array_manager):
  351. # GH#35488
  352. df1 = DataFrame(np.random.default_rng(2).integers(1000, size=(5, 3)))
  353. df2 = DataFrame(np.random.default_rng(2).integers(1000, size=(5, 2)))
  354. df3 = pd.concat([df1, df2], axis=1)
  355. if not using_array_manager:
  356. assert len(df3._mgr.blocks) == 2
  357. result = df3.shift(2, axis=1)
  358. expected = df3.take([-1, -1, 0, 1, 2], axis=1)
  359. # Explicit cast to float to avoid implicit cast when setting nan.
  360. # Column names aren't unique, so directly calling `expected.astype` won't work.
  361. expected = expected.pipe(
  362. lambda df: df.set_axis(range(df.shape[1]), axis=1)
  363. .astype({0: "float", 1: "float"})
  364. .set_axis(df.columns, axis=1)
  365. )
  366. expected.iloc[:, :2] = np.nan
  367. expected.columns = df3.columns
  368. tm.assert_frame_equal(result, expected)
  369. # Case with periods < 0
  370. # rebuild df3 because `take` call above consolidated
  371. df3 = pd.concat([df1, df2], axis=1)
  372. if not using_array_manager:
  373. assert len(df3._mgr.blocks) == 2
  374. result = df3.shift(-2, axis=1)
  375. expected = df3.take([2, 3, 4, -1, -1], axis=1)
  376. # Explicit cast to float to avoid implicit cast when setting nan.
  377. # Column names aren't unique, so directly calling `expected.astype` won't work.
  378. expected = expected.pipe(
  379. lambda df: df.set_axis(range(df.shape[1]), axis=1)
  380. .astype({3: "float", 4: "float"})
  381. .set_axis(df.columns, axis=1)
  382. )
  383. expected.iloc[:, -2:] = np.nan
  384. expected.columns = df3.columns
  385. tm.assert_frame_equal(result, expected)
  386. @td.skip_array_manager_not_yet_implemented # TODO(ArrayManager) axis=1 support
  387. def test_shift_axis1_multiple_blocks_with_int_fill(self):
  388. # GH#42719
  389. rng = np.random.default_rng(2)
  390. df1 = DataFrame(rng.integers(1000, size=(5, 3), dtype=int))
  391. df2 = DataFrame(rng.integers(1000, size=(5, 2), dtype=int))
  392. df3 = pd.concat([df1.iloc[:4, 1:3], df2.iloc[:4, :]], axis=1)
  393. result = df3.shift(2, axis=1, fill_value=np.int_(0))
  394. assert len(df3._mgr.blocks) == 2
  395. expected = df3.take([-1, -1, 0, 1], axis=1)
  396. expected.iloc[:, :2] = np.int_(0)
  397. expected.columns = df3.columns
  398. tm.assert_frame_equal(result, expected)
  399. # Case with periods < 0
  400. df3 = pd.concat([df1.iloc[:4, 1:3], df2.iloc[:4, :]], axis=1)
  401. result = df3.shift(-2, axis=1, fill_value=np.int_(0))
  402. assert len(df3._mgr.blocks) == 2
  403. expected = df3.take([2, 3, -1, -1], axis=1)
  404. expected.iloc[:, -2:] = np.int_(0)
  405. expected.columns = df3.columns
  406. tm.assert_frame_equal(result, expected)
  407. def test_period_index_frame_shift_with_freq(self, frame_or_series):
  408. ps = DataFrame(range(4), index=pd.period_range("2020-01-01", periods=4))
  409. ps = tm.get_obj(ps, frame_or_series)
  410. shifted = ps.shift(1, freq="infer")
  411. unshifted = shifted.shift(-1, freq="infer")
  412. tm.assert_equal(unshifted, ps)
  413. shifted2 = ps.shift(freq="D")
  414. tm.assert_equal(shifted, shifted2)
  415. shifted3 = ps.shift(freq=offsets.Day())
  416. tm.assert_equal(shifted, shifted3)
  417. def test_datetime_frame_shift_with_freq(self, datetime_frame, frame_or_series):
  418. dtobj = tm.get_obj(datetime_frame, frame_or_series)
  419. shifted = dtobj.shift(1, freq="infer")
  420. unshifted = shifted.shift(-1, freq="infer")
  421. tm.assert_equal(dtobj, unshifted)
  422. shifted2 = dtobj.shift(freq=dtobj.index.freq)
  423. tm.assert_equal(shifted, shifted2)
  424. inferred_ts = DataFrame(
  425. datetime_frame.values,
  426. Index(np.asarray(datetime_frame.index)),
  427. columns=datetime_frame.columns,
  428. )
  429. inferred_ts = tm.get_obj(inferred_ts, frame_or_series)
  430. shifted = inferred_ts.shift(1, freq="infer")
  431. expected = dtobj.shift(1, freq="infer")
  432. expected.index = expected.index._with_freq(None)
  433. tm.assert_equal(shifted, expected)
  434. unshifted = shifted.shift(-1, freq="infer")
  435. tm.assert_equal(unshifted, inferred_ts)
  436. def test_period_index_frame_shift_with_freq_error(self, frame_or_series):
  437. ps = DataFrame(range(4), index=pd.period_range("2020-01-01", periods=4))
  438. ps = tm.get_obj(ps, frame_or_series)
  439. msg = "Given freq M does not match PeriodIndex freq D"
  440. with pytest.raises(ValueError, match=msg):
  441. ps.shift(freq="M")
  442. def test_datetime_frame_shift_with_freq_error(
  443. self, datetime_frame, frame_or_series
  444. ):
  445. dtobj = tm.get_obj(datetime_frame, frame_or_series)
  446. no_freq = dtobj.iloc[[0, 5, 7]]
  447. msg = "Freq was not set in the index hence cannot be inferred"
  448. with pytest.raises(ValueError, match=msg):
  449. no_freq.shift(freq="infer")
  450. def test_shift_dt64values_int_fill_deprecated(self):
  451. # GH#31971
  452. ser = Series([pd.Timestamp("2020-01-01"), pd.Timestamp("2020-01-02")])
  453. with pytest.raises(TypeError, match="value should be a"):
  454. ser.shift(1, fill_value=0)
  455. df = ser.to_frame()
  456. with pytest.raises(TypeError, match="value should be a"):
  457. df.shift(1, fill_value=0)
  458. # axis = 1
  459. df2 = DataFrame({"A": ser, "B": ser})
  460. df2._consolidate_inplace()
  461. result = df2.shift(1, axis=1, fill_value=0)
  462. expected = DataFrame({"A": [0, 0], "B": df2["A"]})
  463. tm.assert_frame_equal(result, expected)
  464. # same thing but not consolidated; pre-2.0 we got different behavior
  465. df3 = DataFrame({"A": ser})
  466. df3["B"] = ser
  467. assert len(df3._mgr.arrays) == 2
  468. result = df3.shift(1, axis=1, fill_value=0)
  469. tm.assert_frame_equal(result, expected)
  470. @pytest.mark.parametrize(
  471. "as_cat",
  472. [
  473. pytest.param(
  474. True,
  475. marks=pytest.mark.xfail(
  476. reason="_can_hold_element incorrectly always returns True"
  477. ),
  478. ),
  479. False,
  480. ],
  481. )
  482. @pytest.mark.parametrize(
  483. "vals",
  484. [
  485. date_range("2020-01-01", periods=2),
  486. date_range("2020-01-01", periods=2, tz="US/Pacific"),
  487. pd.period_range("2020-01-01", periods=2, freq="D"),
  488. pd.timedelta_range("2020 Days", periods=2, freq="D"),
  489. pd.interval_range(0, 3, periods=2),
  490. pytest.param(
  491. pd.array([1, 2], dtype="Int64"),
  492. marks=pytest.mark.xfail(
  493. reason="_can_hold_element incorrectly always returns True"
  494. ),
  495. ),
  496. pytest.param(
  497. pd.array([1, 2], dtype="Float32"),
  498. marks=pytest.mark.xfail(
  499. reason="_can_hold_element incorrectly always returns True"
  500. ),
  501. ),
  502. ],
  503. ids=lambda x: str(x.dtype),
  504. )
  505. def test_shift_dt64values_axis1_invalid_fill(self, vals, as_cat):
  506. # GH#44564
  507. ser = Series(vals)
  508. if as_cat:
  509. ser = ser.astype("category")
  510. df = DataFrame({"A": ser})
  511. result = df.shift(-1, axis=1, fill_value="foo")
  512. expected = DataFrame({"A": ["foo", "foo"]})
  513. tm.assert_frame_equal(result, expected)
  514. # same thing but multiple blocks
  515. df2 = DataFrame({"A": ser, "B": ser})
  516. df2._consolidate_inplace()
  517. result = df2.shift(-1, axis=1, fill_value="foo")
  518. expected = DataFrame({"A": df2["B"], "B": ["foo", "foo"]})
  519. tm.assert_frame_equal(result, expected)
  520. # same thing but not consolidated
  521. df3 = DataFrame({"A": ser})
  522. df3["B"] = ser
  523. assert len(df3._mgr.arrays) == 2
  524. result = df3.shift(-1, axis=1, fill_value="foo")
  525. tm.assert_frame_equal(result, expected)
  526. def test_shift_axis1_categorical_columns(self):
  527. # GH#38434
  528. ci = CategoricalIndex(["a", "b", "c"])
  529. df = DataFrame(
  530. {"a": [1, 3], "b": [2, 4], "c": [5, 6]}, index=ci[:-1], columns=ci
  531. )
  532. result = df.shift(axis=1)
  533. expected = DataFrame(
  534. {"a": [np.nan, np.nan], "b": [1, 3], "c": [2, 4]}, index=ci[:-1], columns=ci
  535. )
  536. tm.assert_frame_equal(result, expected)
  537. # periods != 1
  538. result = df.shift(2, axis=1)
  539. expected = DataFrame(
  540. {"a": [np.nan, np.nan], "b": [np.nan, np.nan], "c": [1, 3]},
  541. index=ci[:-1],
  542. columns=ci,
  543. )
  544. tm.assert_frame_equal(result, expected)
  545. def test_shift_axis1_many_periods(self):
  546. # GH#44978 periods > len(columns)
  547. df = DataFrame(np.random.default_rng(2).random((5, 3)))
  548. shifted = df.shift(6, axis=1, fill_value=None)
  549. expected = df * np.nan
  550. tm.assert_frame_equal(shifted, expected)
  551. shifted2 = df.shift(-6, axis=1, fill_value=None)
  552. tm.assert_frame_equal(shifted2, expected)
  553. def test_shift_with_offsets_freq(self):
  554. df = DataFrame({"x": [1, 2, 3]}, index=date_range("2000", periods=3))
  555. shifted = df.shift(freq="1MS")
  556. expected = DataFrame(
  557. {"x": [1, 2, 3]},
  558. index=date_range(start="02/01/2000", end="02/01/2000", periods=3),
  559. )
  560. tm.assert_frame_equal(shifted, expected)
  561. def test_shift_with_iterable_basic_functionality(self):
  562. # GH#44424
  563. data = {"a": [1, 2, 3], "b": [4, 5, 6]}
  564. shifts = [0, 1, 2]
  565. df = DataFrame(data)
  566. shifted = df.shift(shifts)
  567. expected = DataFrame(
  568. {
  569. "a_0": [1, 2, 3],
  570. "b_0": [4, 5, 6],
  571. "a_1": [np.nan, 1.0, 2.0],
  572. "b_1": [np.nan, 4.0, 5.0],
  573. "a_2": [np.nan, np.nan, 1.0],
  574. "b_2": [np.nan, np.nan, 4.0],
  575. }
  576. )
  577. tm.assert_frame_equal(expected, shifted)
  578. def test_shift_with_iterable_series(self):
  579. # GH#44424
  580. data = {"a": [1, 2, 3]}
  581. shifts = [0, 1, 2]
  582. df = DataFrame(data)
  583. s = df["a"]
  584. tm.assert_frame_equal(s.shift(shifts), df.shift(shifts))
  585. def test_shift_with_iterable_freq_and_fill_value(self):
  586. # GH#44424
  587. df = DataFrame(
  588. np.random.default_rng(2).standard_normal(5),
  589. index=date_range("1/1/2000", periods=5, freq="h"),
  590. )
  591. tm.assert_frame_equal(
  592. # rename because shift with an iterable leads to str column names
  593. df.shift([1], fill_value=1).rename(columns=lambda x: int(x[0])),
  594. df.shift(1, fill_value=1),
  595. )
  596. tm.assert_frame_equal(
  597. df.shift([1], freq="h").rename(columns=lambda x: int(x[0])),
  598. df.shift(1, freq="h"),
  599. )
  600. msg = (
  601. "Passing a 'freq' together with a 'fill_value' silently ignores the "
  602. "fill_value"
  603. )
  604. with tm.assert_produces_warning(FutureWarning, match=msg):
  605. df.shift([1, 2], fill_value=1, freq="h")
  606. def test_shift_with_iterable_check_other_arguments(self):
  607. # GH#44424
  608. data = {"a": [1, 2], "b": [4, 5]}
  609. shifts = [0, 1]
  610. df = DataFrame(data)
  611. # test suffix
  612. shifted = df[["a"]].shift(shifts, suffix="_suffix")
  613. expected = DataFrame({"a_suffix_0": [1, 2], "a_suffix_1": [np.nan, 1.0]})
  614. tm.assert_frame_equal(shifted, expected)
  615. # check bad inputs when doing multiple shifts
  616. msg = "If `periods` contains multiple shifts, `axis` cannot be 1."
  617. with pytest.raises(ValueError, match=msg):
  618. df.shift(shifts, axis=1)
  619. msg = "Periods must be integer, but s is <class 'str'>."
  620. with pytest.raises(TypeError, match=msg):
  621. df.shift(["s"])
  622. msg = "If `periods` is an iterable, it cannot be empty."
  623. with pytest.raises(ValueError, match=msg):
  624. df.shift([])
  625. msg = "Cannot specify `suffix` if `periods` is an int."
  626. with pytest.raises(ValueError, match=msg):
  627. df.shift(1, suffix="fails")
  628. def test_shift_axis_one_empty(self):
  629. # GH#57301
  630. df = DataFrame()
  631. result = df.shift(1, axis=1)
  632. tm.assert_frame_equal(result, df)