test_partial.py 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696
  1. """
  2. test setting *parts* of objects both positionally and label based
  3. TODO: these should be split among the indexer tests
  4. """
  5. import numpy as np
  6. import pytest
  7. import pandas as pd
  8. from pandas import (
  9. DataFrame,
  10. Index,
  11. Period,
  12. Series,
  13. Timestamp,
  14. date_range,
  15. period_range,
  16. )
  17. import pandas._testing as tm
  18. class TestEmptyFrameSetitemExpansion:
  19. def test_empty_frame_setitem_index_name_retained(self):
  20. # GH#31368 empty frame has non-None index.name -> retained
  21. df = DataFrame({}, index=pd.RangeIndex(0, name="df_index"))
  22. series = Series(1.23, index=pd.RangeIndex(4, name="series_index"))
  23. df["series"] = series
  24. expected = DataFrame(
  25. {"series": [1.23] * 4},
  26. index=pd.RangeIndex(4, name="df_index"),
  27. columns=Index(["series"]),
  28. )
  29. tm.assert_frame_equal(df, expected)
  30. def test_empty_frame_setitem_index_name_inherited(self):
  31. # GH#36527 empty frame has None index.name -> not retained
  32. df = DataFrame()
  33. series = Series(1.23, index=pd.RangeIndex(4, name="series_index"))
  34. df["series"] = series
  35. expected = DataFrame(
  36. {"series": [1.23] * 4},
  37. index=pd.RangeIndex(4, name="series_index"),
  38. columns=Index(["series"]),
  39. )
  40. tm.assert_frame_equal(df, expected)
  41. def test_loc_setitem_zerolen_series_columns_align(self):
  42. # columns will align
  43. df = DataFrame(columns=["A", "B"])
  44. df.loc[0] = Series(1, index=range(4))
  45. expected = DataFrame(columns=["A", "B"], index=[0], dtype=np.float64)
  46. tm.assert_frame_equal(df, expected)
  47. # columns will align
  48. df = DataFrame(columns=["A", "B"])
  49. df.loc[0] = Series(1, index=["B"])
  50. exp = DataFrame([[np.nan, 1]], columns=["A", "B"], index=[0], dtype="float64")
  51. tm.assert_frame_equal(df, exp)
  52. def test_loc_setitem_zerolen_list_length_must_match_columns(self):
  53. # list-like must conform
  54. df = DataFrame(columns=["A", "B"])
  55. msg = "cannot set a row with mismatched columns"
  56. with pytest.raises(ValueError, match=msg):
  57. df.loc[0] = [1, 2, 3]
  58. df = DataFrame(columns=["A", "B"])
  59. df.loc[3] = [6, 7] # length matches len(df.columns) --> OK!
  60. exp = DataFrame([[6, 7]], index=[3], columns=["A", "B"], dtype=np.int64)
  61. tm.assert_frame_equal(df, exp)
  62. def test_partial_set_empty_frame(self):
  63. # partially set with an empty object
  64. # frame
  65. df = DataFrame()
  66. msg = "cannot set a frame with no defined columns"
  67. with pytest.raises(ValueError, match=msg):
  68. df.loc[1] = 1
  69. with pytest.raises(ValueError, match=msg):
  70. df.loc[1] = Series([1], index=["foo"])
  71. msg = "cannot set a frame with no defined index and a scalar"
  72. with pytest.raises(ValueError, match=msg):
  73. df.loc[:, 1] = 1
  74. def test_partial_set_empty_frame2(self):
  75. # these work as they don't really change
  76. # anything but the index
  77. # GH#5632
  78. expected = DataFrame(columns=Index(["foo"]), index=Index([], dtype="object"))
  79. df = DataFrame(index=Index([], dtype="object"))
  80. df["foo"] = Series([], dtype="object")
  81. tm.assert_frame_equal(df, expected)
  82. df = DataFrame(index=Index([]))
  83. df["foo"] = Series(df.index)
  84. tm.assert_frame_equal(df, expected)
  85. df = DataFrame(index=Index([]))
  86. df["foo"] = df.index
  87. tm.assert_frame_equal(df, expected)
  88. def test_partial_set_empty_frame3(self):
  89. expected = DataFrame(columns=Index(["foo"]), index=Index([], dtype="int64"))
  90. expected["foo"] = expected["foo"].astype("float64")
  91. df = DataFrame(index=Index([], dtype="int64"))
  92. df["foo"] = []
  93. tm.assert_frame_equal(df, expected)
  94. df = DataFrame(index=Index([], dtype="int64"))
  95. df["foo"] = Series(np.arange(len(df)), dtype="float64")
  96. tm.assert_frame_equal(df, expected)
  97. def test_partial_set_empty_frame4(self):
  98. df = DataFrame(index=Index([], dtype="int64"))
  99. df["foo"] = range(len(df))
  100. expected = DataFrame(columns=Index(["foo"]), index=Index([], dtype="int64"))
  101. # range is int-dtype-like, so we get int64 dtype
  102. expected["foo"] = expected["foo"].astype("int64")
  103. tm.assert_frame_equal(df, expected)
  104. def test_partial_set_empty_frame5(self):
  105. df = DataFrame()
  106. tm.assert_index_equal(df.columns, pd.RangeIndex(0))
  107. df2 = DataFrame()
  108. df2[1] = Series([1], index=["foo"])
  109. df.loc[:, 1] = Series([1], index=["foo"])
  110. tm.assert_frame_equal(df, DataFrame([[1]], index=["foo"], columns=[1]))
  111. tm.assert_frame_equal(df, df2)
  112. def test_partial_set_empty_frame_no_index(self):
  113. # no index to start
  114. expected = DataFrame({0: Series(1, index=range(4))}, columns=["A", "B", 0])
  115. df = DataFrame(columns=["A", "B"])
  116. df[0] = Series(1, index=range(4))
  117. tm.assert_frame_equal(df, expected)
  118. df = DataFrame(columns=["A", "B"])
  119. df.loc[:, 0] = Series(1, index=range(4))
  120. tm.assert_frame_equal(df, expected)
  121. def test_partial_set_empty_frame_row(self):
  122. # GH#5720, GH#5744
  123. # don't create rows when empty
  124. expected = DataFrame(columns=["A", "B", "New"], index=Index([], dtype="int64"))
  125. expected["A"] = expected["A"].astype("int64")
  126. expected["B"] = expected["B"].astype("float64")
  127. expected["New"] = expected["New"].astype("float64")
  128. df = DataFrame({"A": [1, 2, 3], "B": [1.2, 4.2, 5.2]})
  129. y = df[df.A > 5]
  130. y["New"] = np.nan
  131. tm.assert_frame_equal(y, expected)
  132. expected = DataFrame(columns=["a", "b", "c c", "d"])
  133. expected["d"] = expected["d"].astype("int64")
  134. df = DataFrame(columns=["a", "b", "c c"])
  135. df["d"] = 3
  136. tm.assert_frame_equal(df, expected)
  137. tm.assert_series_equal(df["c c"], Series(name="c c", dtype=object))
  138. # reindex columns is ok
  139. df = DataFrame({"A": [1, 2, 3], "B": [1.2, 4.2, 5.2]})
  140. y = df[df.A > 5]
  141. result = y.reindex(columns=["A", "B", "C"])
  142. expected = DataFrame(columns=["A", "B", "C"])
  143. expected["A"] = expected["A"].astype("int64")
  144. expected["B"] = expected["B"].astype("float64")
  145. expected["C"] = expected["C"].astype("float64")
  146. tm.assert_frame_equal(result, expected)
  147. def test_partial_set_empty_frame_set_series(self):
  148. # GH#5756
  149. # setting with empty Series
  150. df = DataFrame(Series(dtype=object))
  151. expected = DataFrame({0: Series(dtype=object)})
  152. tm.assert_frame_equal(df, expected)
  153. df = DataFrame(Series(name="foo", dtype=object))
  154. expected = DataFrame({"foo": Series(dtype=object)})
  155. tm.assert_frame_equal(df, expected)
  156. def test_partial_set_empty_frame_empty_copy_assignment(self):
  157. # GH#5932
  158. # copy on empty with assignment fails
  159. df = DataFrame(index=[0])
  160. df = df.copy()
  161. df["a"] = 0
  162. expected = DataFrame(0, index=[0], columns=Index(["a"]))
  163. tm.assert_frame_equal(df, expected)
  164. def test_partial_set_empty_frame_empty_consistencies(self, using_infer_string):
  165. # GH#6171
  166. # consistency on empty frames
  167. df = DataFrame(columns=["x", "y"])
  168. df["x"] = [1, 2]
  169. expected = DataFrame({"x": [1, 2], "y": [np.nan, np.nan]})
  170. tm.assert_frame_equal(df, expected, check_dtype=False)
  171. df = DataFrame(columns=["x", "y"])
  172. df["x"] = ["1", "2"]
  173. expected = DataFrame(
  174. {
  175. "x": Series(
  176. ["1", "2"],
  177. dtype=object if not using_infer_string else "str",
  178. ),
  179. "y": Series([np.nan, np.nan], dtype=object),
  180. }
  181. )
  182. tm.assert_frame_equal(df, expected)
  183. df = DataFrame(columns=["x", "y"])
  184. df.loc[0, "x"] = 1
  185. expected = DataFrame({"x": [1], "y": [np.nan]})
  186. tm.assert_frame_equal(df, expected, check_dtype=False)
  187. class TestPartialSetting:
  188. def test_partial_setting(self):
  189. # GH2578, allow ix and friends to partially set
  190. # series
  191. s_orig = Series([1, 2, 3])
  192. s = s_orig.copy()
  193. s[5] = 5
  194. expected = Series([1, 2, 3, 5], index=[0, 1, 2, 5])
  195. tm.assert_series_equal(s, expected)
  196. s = s_orig.copy()
  197. s.loc[5] = 5
  198. expected = Series([1, 2, 3, 5], index=[0, 1, 2, 5])
  199. tm.assert_series_equal(s, expected)
  200. s = s_orig.copy()
  201. s[5] = 5.0
  202. expected = Series([1, 2, 3, 5.0], index=[0, 1, 2, 5])
  203. tm.assert_series_equal(s, expected)
  204. s = s_orig.copy()
  205. s.loc[5] = 5.0
  206. expected = Series([1, 2, 3, 5.0], index=[0, 1, 2, 5])
  207. tm.assert_series_equal(s, expected)
  208. # iloc/iat raise
  209. s = s_orig.copy()
  210. msg = "iloc cannot enlarge its target object"
  211. with pytest.raises(IndexError, match=msg):
  212. s.iloc[3] = 5.0
  213. msg = "index 3 is out of bounds for axis 0 with size 3"
  214. with pytest.raises(IndexError, match=msg):
  215. s.iat[3] = 5.0
  216. @pytest.mark.filterwarnings("ignore:Setting a value on a view:FutureWarning")
  217. def test_partial_setting_frame(self, using_array_manager):
  218. df_orig = DataFrame(
  219. np.arange(6).reshape(3, 2), columns=["A", "B"], dtype="int64"
  220. )
  221. # iloc/iat raise
  222. df = df_orig.copy()
  223. msg = "iloc cannot enlarge its target object"
  224. with pytest.raises(IndexError, match=msg):
  225. df.iloc[4, 2] = 5.0
  226. msg = "index 2 is out of bounds for axis 0 with size 2"
  227. if using_array_manager:
  228. msg = "list index out of range"
  229. with pytest.raises(IndexError, match=msg):
  230. df.iat[4, 2] = 5.0
  231. # row setting where it exists
  232. expected = DataFrame({"A": [0, 4, 4], "B": [1, 5, 5]})
  233. df = df_orig.copy()
  234. df.iloc[1] = df.iloc[2]
  235. tm.assert_frame_equal(df, expected)
  236. expected = DataFrame({"A": [0, 4, 4], "B": [1, 5, 5]})
  237. df = df_orig.copy()
  238. df.loc[1] = df.loc[2]
  239. tm.assert_frame_equal(df, expected)
  240. # like 2578, partial setting with dtype preservation
  241. expected = DataFrame({"A": [0, 2, 4, 4], "B": [1, 3, 5, 5]})
  242. df = df_orig.copy()
  243. df.loc[3] = df.loc[2]
  244. tm.assert_frame_equal(df, expected)
  245. # single dtype frame, overwrite
  246. expected = DataFrame({"A": [0, 2, 4], "B": [0, 2, 4]})
  247. df = df_orig.copy()
  248. df.loc[:, "B"] = df.loc[:, "A"]
  249. tm.assert_frame_equal(df, expected)
  250. # mixed dtype frame, overwrite
  251. expected = DataFrame({"A": [0, 2, 4], "B": Series([0.0, 2.0, 4.0])})
  252. df = df_orig.copy()
  253. df["B"] = df["B"].astype(np.float64)
  254. # as of 2.0, df.loc[:, "B"] = ... attempts (and here succeeds) at
  255. # setting inplace
  256. df.loc[:, "B"] = df.loc[:, "A"]
  257. tm.assert_frame_equal(df, expected)
  258. # single dtype frame, partial setting
  259. expected = df_orig.copy()
  260. expected["C"] = df["A"]
  261. df = df_orig.copy()
  262. df.loc[:, "C"] = df.loc[:, "A"]
  263. tm.assert_frame_equal(df, expected)
  264. # mixed frame, partial setting
  265. expected = df_orig.copy()
  266. expected["C"] = df["A"]
  267. df = df_orig.copy()
  268. df.loc[:, "C"] = df.loc[:, "A"]
  269. tm.assert_frame_equal(df, expected)
  270. def test_partial_setting2(self):
  271. # GH 8473
  272. dates = date_range("1/1/2000", periods=8)
  273. df_orig = DataFrame(
  274. np.random.default_rng(2).standard_normal((8, 4)),
  275. index=dates,
  276. columns=["A", "B", "C", "D"],
  277. )
  278. expected = pd.concat(
  279. [df_orig, DataFrame({"A": 7}, index=dates[-1:] + dates.freq)], sort=True
  280. )
  281. df = df_orig.copy()
  282. df.loc[dates[-1] + dates.freq, "A"] = 7
  283. tm.assert_frame_equal(df, expected)
  284. df = df_orig.copy()
  285. df.at[dates[-1] + dates.freq, "A"] = 7
  286. tm.assert_frame_equal(df, expected)
  287. exp_other = DataFrame({0: 7}, index=dates[-1:] + dates.freq)
  288. expected = pd.concat([df_orig, exp_other], axis=1)
  289. df = df_orig.copy()
  290. df.loc[dates[-1] + dates.freq, 0] = 7
  291. tm.assert_frame_equal(df, expected)
  292. df = df_orig.copy()
  293. df.at[dates[-1] + dates.freq, 0] = 7
  294. tm.assert_frame_equal(df, expected)
  295. def test_partial_setting_mixed_dtype(self):
  296. # in a mixed dtype environment, try to preserve dtypes
  297. # by appending
  298. df = DataFrame([[True, 1], [False, 2]], columns=["female", "fitness"])
  299. s = df.loc[1].copy()
  300. s.name = 2
  301. expected = pd.concat([df, DataFrame(s).T.infer_objects()])
  302. df.loc[2] = df.loc[1]
  303. tm.assert_frame_equal(df, expected)
  304. def test_series_partial_set(self):
  305. # partial set with new index
  306. # Regression from GH4825
  307. ser = Series([0.1, 0.2], index=[1, 2])
  308. # loc equiv to .reindex
  309. expected = Series([np.nan, 0.2, np.nan], index=[3, 2, 3])
  310. with pytest.raises(KeyError, match=r"not in index"):
  311. ser.loc[[3, 2, 3]]
  312. result = ser.reindex([3, 2, 3])
  313. tm.assert_series_equal(result, expected, check_index_type=True)
  314. expected = Series([np.nan, 0.2, np.nan, np.nan], index=[3, 2, 3, "x"])
  315. with pytest.raises(KeyError, match="not in index"):
  316. ser.loc[[3, 2, 3, "x"]]
  317. result = ser.reindex([3, 2, 3, "x"])
  318. tm.assert_series_equal(result, expected, check_index_type=True)
  319. expected = Series([0.2, 0.2, 0.1], index=[2, 2, 1])
  320. result = ser.loc[[2, 2, 1]]
  321. tm.assert_series_equal(result, expected, check_index_type=True)
  322. expected = Series([0.2, 0.2, np.nan, 0.1], index=[2, 2, "x", 1])
  323. with pytest.raises(KeyError, match="not in index"):
  324. ser.loc[[2, 2, "x", 1]]
  325. result = ser.reindex([2, 2, "x", 1])
  326. tm.assert_series_equal(result, expected, check_index_type=True)
  327. # raises as nothing is in the index
  328. msg = (
  329. rf"\"None of \[Index\(\[3, 3, 3\], dtype='{np.dtype(int)}'\)\] "
  330. r"are in the \[index\]\""
  331. )
  332. with pytest.raises(KeyError, match=msg):
  333. ser.loc[[3, 3, 3]]
  334. expected = Series([0.2, 0.2, np.nan], index=[2, 2, 3])
  335. with pytest.raises(KeyError, match="not in index"):
  336. ser.loc[[2, 2, 3]]
  337. result = ser.reindex([2, 2, 3])
  338. tm.assert_series_equal(result, expected, check_index_type=True)
  339. s = Series([0.1, 0.2, 0.3], index=[1, 2, 3])
  340. expected = Series([0.3, np.nan, np.nan], index=[3, 4, 4])
  341. with pytest.raises(KeyError, match="not in index"):
  342. s.loc[[3, 4, 4]]
  343. result = s.reindex([3, 4, 4])
  344. tm.assert_series_equal(result, expected, check_index_type=True)
  345. s = Series([0.1, 0.2, 0.3, 0.4], index=[1, 2, 3, 4])
  346. expected = Series([np.nan, 0.3, 0.3], index=[5, 3, 3])
  347. with pytest.raises(KeyError, match="not in index"):
  348. s.loc[[5, 3, 3]]
  349. result = s.reindex([5, 3, 3])
  350. tm.assert_series_equal(result, expected, check_index_type=True)
  351. s = Series([0.1, 0.2, 0.3, 0.4], index=[1, 2, 3, 4])
  352. expected = Series([np.nan, 0.4, 0.4], index=[5, 4, 4])
  353. with pytest.raises(KeyError, match="not in index"):
  354. s.loc[[5, 4, 4]]
  355. result = s.reindex([5, 4, 4])
  356. tm.assert_series_equal(result, expected, check_index_type=True)
  357. s = Series([0.1, 0.2, 0.3, 0.4], index=[4, 5, 6, 7])
  358. expected = Series([0.4, np.nan, np.nan], index=[7, 2, 2])
  359. with pytest.raises(KeyError, match="not in index"):
  360. s.loc[[7, 2, 2]]
  361. result = s.reindex([7, 2, 2])
  362. tm.assert_series_equal(result, expected, check_index_type=True)
  363. s = Series([0.1, 0.2, 0.3, 0.4], index=[1, 2, 3, 4])
  364. expected = Series([0.4, np.nan, np.nan], index=[4, 5, 5])
  365. with pytest.raises(KeyError, match="not in index"):
  366. s.loc[[4, 5, 5]]
  367. result = s.reindex([4, 5, 5])
  368. tm.assert_series_equal(result, expected, check_index_type=True)
  369. # iloc
  370. expected = Series([0.2, 0.2, 0.1, 0.1], index=[2, 2, 1, 1])
  371. result = ser.iloc[[1, 1, 0, 0]]
  372. tm.assert_series_equal(result, expected, check_index_type=True)
  373. def test_series_partial_set_with_name(self):
  374. # GH 11497
  375. idx = Index([1, 2], dtype="int64", name="idx")
  376. ser = Series([0.1, 0.2], index=idx, name="s")
  377. # loc
  378. with pytest.raises(KeyError, match=r"\[3\] not in index"):
  379. ser.loc[[3, 2, 3]]
  380. with pytest.raises(KeyError, match=r"not in index"):
  381. ser.loc[[3, 2, 3, "x"]]
  382. exp_idx = Index([2, 2, 1], dtype="int64", name="idx")
  383. expected = Series([0.2, 0.2, 0.1], index=exp_idx, name="s")
  384. result = ser.loc[[2, 2, 1]]
  385. tm.assert_series_equal(result, expected, check_index_type=True)
  386. with pytest.raises(KeyError, match=r"\['x'\] not in index"):
  387. ser.loc[[2, 2, "x", 1]]
  388. # raises as nothing is in the index
  389. msg = (
  390. rf"\"None of \[Index\(\[3, 3, 3\], dtype='{np.dtype(int)}', "
  391. r"name='idx'\)\] are in the \[index\]\""
  392. )
  393. with pytest.raises(KeyError, match=msg):
  394. ser.loc[[3, 3, 3]]
  395. with pytest.raises(KeyError, match="not in index"):
  396. ser.loc[[2, 2, 3]]
  397. idx = Index([1, 2, 3], dtype="int64", name="idx")
  398. with pytest.raises(KeyError, match="not in index"):
  399. Series([0.1, 0.2, 0.3], index=idx, name="s").loc[[3, 4, 4]]
  400. idx = Index([1, 2, 3, 4], dtype="int64", name="idx")
  401. with pytest.raises(KeyError, match="not in index"):
  402. Series([0.1, 0.2, 0.3, 0.4], index=idx, name="s").loc[[5, 3, 3]]
  403. idx = Index([1, 2, 3, 4], dtype="int64", name="idx")
  404. with pytest.raises(KeyError, match="not in index"):
  405. Series([0.1, 0.2, 0.3, 0.4], index=idx, name="s").loc[[5, 4, 4]]
  406. idx = Index([4, 5, 6, 7], dtype="int64", name="idx")
  407. with pytest.raises(KeyError, match="not in index"):
  408. Series([0.1, 0.2, 0.3, 0.4], index=idx, name="s").loc[[7, 2, 2]]
  409. idx = Index([1, 2, 3, 4], dtype="int64", name="idx")
  410. with pytest.raises(KeyError, match="not in index"):
  411. Series([0.1, 0.2, 0.3, 0.4], index=idx, name="s").loc[[4, 5, 5]]
  412. # iloc
  413. exp_idx = Index([2, 2, 1, 1], dtype="int64", name="idx")
  414. expected = Series([0.2, 0.2, 0.1, 0.1], index=exp_idx, name="s")
  415. result = ser.iloc[[1, 1, 0, 0]]
  416. tm.assert_series_equal(result, expected, check_index_type=True)
  417. @pytest.mark.parametrize("key", [100, 100.0])
  418. def test_setitem_with_expansion_numeric_into_datetimeindex(self, key):
  419. # GH#4940 inserting non-strings
  420. orig = DataFrame(
  421. np.random.default_rng(2).standard_normal((10, 4)),
  422. columns=Index(list("ABCD"), dtype=object),
  423. index=date_range("2000-01-01", periods=10, freq="B"),
  424. )
  425. df = orig.copy()
  426. df.loc[key, :] = df.iloc[0]
  427. ex_index = Index(list(orig.index) + [key], dtype=object, name=orig.index.name)
  428. ex_data = np.concatenate([orig.values, df.iloc[[0]].values], axis=0)
  429. expected = DataFrame(ex_data, index=ex_index, columns=orig.columns)
  430. tm.assert_frame_equal(df, expected)
  431. def test_partial_set_invalid(self):
  432. # GH 4940
  433. # allow only setting of 'valid' values
  434. orig = DataFrame(
  435. np.random.default_rng(2).standard_normal((10, 4)),
  436. columns=Index(list("ABCD"), dtype=object),
  437. index=date_range("2000-01-01", periods=10, freq="B"),
  438. )
  439. # allow object conversion here
  440. df = orig.copy()
  441. df.loc["a", :] = df.iloc[0]
  442. ser = Series(df.iloc[0], name="a")
  443. exp = pd.concat([orig, DataFrame(ser).T.infer_objects()])
  444. tm.assert_frame_equal(df, exp)
  445. tm.assert_index_equal(df.index, Index(orig.index.tolist() + ["a"]))
  446. assert df.index.dtype == "object"
  447. @pytest.mark.parametrize(
  448. "idx,labels,expected_idx",
  449. [
  450. (
  451. period_range(start="2000", periods=20, freq="D"),
  452. ["2000-01-04", "2000-01-08", "2000-01-12"],
  453. [
  454. Period("2000-01-04", freq="D"),
  455. Period("2000-01-08", freq="D"),
  456. Period("2000-01-12", freq="D"),
  457. ],
  458. ),
  459. (
  460. date_range(start="2000", periods=20, freq="D"),
  461. ["2000-01-04", "2000-01-08", "2000-01-12"],
  462. [
  463. Timestamp("2000-01-04"),
  464. Timestamp("2000-01-08"),
  465. Timestamp("2000-01-12"),
  466. ],
  467. ),
  468. (
  469. pd.timedelta_range(start="1 day", periods=20),
  470. ["4D", "8D", "12D"],
  471. [pd.Timedelta("4 day"), pd.Timedelta("8 day"), pd.Timedelta("12 day")],
  472. ),
  473. ],
  474. )
  475. def test_loc_with_list_of_strings_representing_datetimes(
  476. self, idx, labels, expected_idx, frame_or_series
  477. ):
  478. # GH 11278
  479. obj = frame_or_series(range(20), index=idx)
  480. expected_value = [3, 7, 11]
  481. expected = frame_or_series(expected_value, expected_idx)
  482. tm.assert_equal(expected, obj.loc[labels])
  483. if frame_or_series is Series:
  484. tm.assert_series_equal(expected, obj[labels])
  485. @pytest.mark.parametrize(
  486. "idx,labels",
  487. [
  488. (
  489. period_range(start="2000", periods=20, freq="D"),
  490. ["2000-01-04", "2000-01-30"],
  491. ),
  492. (
  493. date_range(start="2000", periods=20, freq="D"),
  494. ["2000-01-04", "2000-01-30"],
  495. ),
  496. (pd.timedelta_range(start="1 day", periods=20), ["3 day", "30 day"]),
  497. ],
  498. )
  499. def test_loc_with_list_of_strings_representing_datetimes_missing_value(
  500. self, idx, labels
  501. ):
  502. # GH 11278
  503. ser = Series(range(20), index=idx)
  504. df = DataFrame(range(20), index=idx)
  505. msg = r"not in index"
  506. with pytest.raises(KeyError, match=msg):
  507. ser.loc[labels]
  508. with pytest.raises(KeyError, match=msg):
  509. ser[labels]
  510. with pytest.raises(KeyError, match=msg):
  511. df.loc[labels]
  512. @pytest.mark.parametrize(
  513. "idx,labels,msg",
  514. [
  515. (
  516. period_range(start="2000", periods=20, freq="D"),
  517. Index(["4D", "8D"], dtype=object),
  518. (
  519. r"None of \[Index\(\['4D', '8D'\], dtype='object'\)\] "
  520. r"are in the \[index\]"
  521. ),
  522. ),
  523. (
  524. date_range(start="2000", periods=20, freq="D"),
  525. Index(["4D", "8D"], dtype=object),
  526. (
  527. r"None of \[Index\(\['4D', '8D'\], dtype='object'\)\] "
  528. r"are in the \[index\]"
  529. ),
  530. ),
  531. (
  532. pd.timedelta_range(start="1 day", periods=20),
  533. Index(["2000-01-04", "2000-01-08"], dtype=object),
  534. (
  535. r"None of \[Index\(\['2000-01-04', '2000-01-08'\], "
  536. r"dtype='object'\)\] are in the \[index\]"
  537. ),
  538. ),
  539. ],
  540. )
  541. def test_loc_with_list_of_strings_representing_datetimes_not_matched_type(
  542. self, idx, labels, msg
  543. ):
  544. # GH 11278
  545. ser = Series(range(20), index=idx)
  546. df = DataFrame(range(20), index=idx)
  547. with pytest.raises(KeyError, match=msg):
  548. ser.loc[labels]
  549. with pytest.raises(KeyError, match=msg):
  550. ser[labels]
  551. with pytest.raises(KeyError, match=msg):
  552. df.loc[labels]
  553. class TestStringSlicing:
  554. def test_slice_irregular_datetime_index_with_nan(self):
  555. # GH36953
  556. index = pd.to_datetime(["2012-01-01", "2012-01-02", "2012-01-03", None])
  557. df = DataFrame(range(len(index)), index=index)
  558. expected = DataFrame(range(len(index[:3])), index=index[:3])
  559. with pytest.raises(KeyError, match="non-existing keys is not allowed"):
  560. # Upper bound is not in index (which is unordered)
  561. # GH53983
  562. # GH37819
  563. df["2012-01-01":"2012-01-04"]
  564. # Need this precision for right bound since the right slice
  565. # bound is "rounded" up to the largest timepoint smaller than
  566. # the next "resolution"-step of the provided point.
  567. # e.g. 2012-01-03 is rounded up to 2012-01-04 - 1ns
  568. result = df["2012-01-01":"2012-01-03 00:00:00.000000000"]
  569. tm.assert_frame_equal(result, expected)