test_arithmetic.py 72 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145
  1. from collections import deque
  2. from datetime import (
  3. datetime,
  4. timezone,
  5. )
  6. from enum import Enum
  7. import functools
  8. import operator
  9. import re
  10. import numpy as np
  11. import pytest
  12. from pandas.compat import HAS_PYARROW
  13. import pandas.util._test_decorators as td
  14. import pandas as pd
  15. from pandas import (
  16. DataFrame,
  17. Index,
  18. MultiIndex,
  19. Series,
  20. )
  21. import pandas._testing as tm
  22. from pandas.core.computation import expressions as expr
  23. from pandas.tests.frame.common import (
  24. _check_mixed_float,
  25. _check_mixed_int,
  26. )
  27. @pytest.fixture
  28. def simple_frame():
  29. """
  30. Fixture for simple 3x3 DataFrame
  31. Columns are ['one', 'two', 'three'], index is ['a', 'b', 'c'].
  32. one two three
  33. a 1.0 2.0 3.0
  34. b 4.0 5.0 6.0
  35. c 7.0 8.0 9.0
  36. """
  37. arr = np.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0]])
  38. return DataFrame(arr, columns=["one", "two", "three"], index=["a", "b", "c"])
  39. @pytest.fixture(autouse=True, params=[0, 100], ids=["numexpr", "python"])
  40. def switch_numexpr_min_elements(request, monkeypatch):
  41. with monkeypatch.context() as m:
  42. m.setattr(expr, "_MIN_ELEMENTS", request.param)
  43. yield request.param
  44. class DummyElement:
  45. def __init__(self, value, dtype) -> None:
  46. self.value = value
  47. self.dtype = np.dtype(dtype)
  48. def __array__(self, dtype=None, copy=None):
  49. return np.array(self.value, dtype=self.dtype)
  50. def __str__(self) -> str:
  51. return f"DummyElement({self.value}, {self.dtype})"
  52. def __repr__(self) -> str:
  53. return str(self)
  54. def astype(self, dtype, copy=False):
  55. self.dtype = dtype
  56. return self
  57. def view(self, dtype):
  58. return type(self)(self.value.view(dtype), dtype)
  59. def any(self, axis=None):
  60. return bool(self.value)
  61. # -------------------------------------------------------------------
  62. # Comparisons
  63. class TestFrameComparisons:
  64. # Specifically _not_ flex-comparisons
  65. def test_comparison_with_categorical_dtype(self):
  66. # GH#12564
  67. df = DataFrame({"A": ["foo", "bar", "baz"]})
  68. exp = DataFrame({"A": [True, False, False]})
  69. res = df == "foo"
  70. tm.assert_frame_equal(res, exp)
  71. # casting to categorical shouldn't affect the result
  72. df["A"] = df["A"].astype("category")
  73. res = df == "foo"
  74. tm.assert_frame_equal(res, exp)
  75. def test_frame_in_list(self):
  76. # GH#12689 this should raise at the DataFrame level, not blocks
  77. df = DataFrame(
  78. np.random.default_rng(2).standard_normal((6, 4)), columns=list("ABCD")
  79. )
  80. msg = "The truth value of a DataFrame is ambiguous"
  81. with pytest.raises(ValueError, match=msg):
  82. df in [None]
  83. @pytest.mark.parametrize(
  84. "arg, arg2",
  85. [
  86. [
  87. {
  88. "a": np.random.default_rng(2).integers(10, size=10),
  89. "b": pd.date_range("20010101", periods=10),
  90. },
  91. {
  92. "a": np.random.default_rng(2).integers(10, size=10),
  93. "b": np.random.default_rng(2).integers(10, size=10),
  94. },
  95. ],
  96. [
  97. {
  98. "a": np.random.default_rng(2).integers(10, size=10),
  99. "b": np.random.default_rng(2).integers(10, size=10),
  100. },
  101. {
  102. "a": np.random.default_rng(2).integers(10, size=10),
  103. "b": pd.date_range("20010101", periods=10),
  104. },
  105. ],
  106. [
  107. {
  108. "a": pd.date_range("20010101", periods=10),
  109. "b": pd.date_range("20010101", periods=10),
  110. },
  111. {
  112. "a": np.random.default_rng(2).integers(10, size=10),
  113. "b": np.random.default_rng(2).integers(10, size=10),
  114. },
  115. ],
  116. [
  117. {
  118. "a": np.random.default_rng(2).integers(10, size=10),
  119. "b": pd.date_range("20010101", periods=10),
  120. },
  121. {
  122. "a": pd.date_range("20010101", periods=10),
  123. "b": pd.date_range("20010101", periods=10),
  124. },
  125. ],
  126. ],
  127. )
  128. def test_comparison_invalid(self, arg, arg2):
  129. # GH4968
  130. # invalid date/int comparisons
  131. x = DataFrame(arg)
  132. y = DataFrame(arg2)
  133. # we expect the result to match Series comparisons for
  134. # == and !=, inequalities should raise
  135. result = x == y
  136. expected = DataFrame(
  137. {col: x[col] == y[col] for col in x.columns},
  138. index=x.index,
  139. columns=x.columns,
  140. )
  141. tm.assert_frame_equal(result, expected)
  142. result = x != y
  143. expected = DataFrame(
  144. {col: x[col] != y[col] for col in x.columns},
  145. index=x.index,
  146. columns=x.columns,
  147. )
  148. tm.assert_frame_equal(result, expected)
  149. msgs = [
  150. r"Invalid comparison between dtype=datetime64\[ns\] and ndarray",
  151. "invalid type promotion",
  152. (
  153. # npdev 1.20.0
  154. r"The DTypes <class 'numpy.dtype\[.*\]'> and "
  155. r"<class 'numpy.dtype\[.*\]'> do not have a common DType."
  156. ),
  157. ]
  158. msg = "|".join(msgs)
  159. with pytest.raises(TypeError, match=msg):
  160. x >= y
  161. with pytest.raises(TypeError, match=msg):
  162. x > y
  163. with pytest.raises(TypeError, match=msg):
  164. x < y
  165. with pytest.raises(TypeError, match=msg):
  166. x <= y
  167. @pytest.mark.parametrize(
  168. "left, right",
  169. [
  170. ("gt", "lt"),
  171. ("lt", "gt"),
  172. ("ge", "le"),
  173. ("le", "ge"),
  174. ("eq", "eq"),
  175. ("ne", "ne"),
  176. ],
  177. )
  178. def test_timestamp_compare(self, left, right):
  179. # make sure we can compare Timestamps on the right AND left hand side
  180. # GH#4982
  181. df = DataFrame(
  182. {
  183. "dates1": pd.date_range("20010101", periods=10),
  184. "dates2": pd.date_range("20010102", periods=10),
  185. "intcol": np.random.default_rng(2).integers(1000000000, size=10),
  186. "floatcol": np.random.default_rng(2).standard_normal(10),
  187. "stringcol": [chr(100 + i) for i in range(10)],
  188. }
  189. )
  190. df.loc[np.random.default_rng(2).random(len(df)) > 0.5, "dates2"] = pd.NaT
  191. left_f = getattr(operator, left)
  192. right_f = getattr(operator, right)
  193. # no nats
  194. if left in ["eq", "ne"]:
  195. expected = left_f(df, pd.Timestamp("20010109"))
  196. result = right_f(pd.Timestamp("20010109"), df)
  197. tm.assert_frame_equal(result, expected)
  198. else:
  199. msg = (
  200. "'(<|>)=?' not supported between "
  201. "instances of 'numpy.ndarray' and 'Timestamp'"
  202. )
  203. with pytest.raises(TypeError, match=msg):
  204. left_f(df, pd.Timestamp("20010109"))
  205. with pytest.raises(TypeError, match=msg):
  206. right_f(pd.Timestamp("20010109"), df)
  207. # nats
  208. if left in ["eq", "ne"]:
  209. expected = left_f(df, pd.Timestamp("nat"))
  210. result = right_f(pd.Timestamp("nat"), df)
  211. tm.assert_frame_equal(result, expected)
  212. else:
  213. msg = (
  214. "'(<|>)=?' not supported between "
  215. "instances of 'numpy.ndarray' and 'NaTType'"
  216. )
  217. with pytest.raises(TypeError, match=msg):
  218. left_f(df, pd.Timestamp("nat"))
  219. with pytest.raises(TypeError, match=msg):
  220. right_f(pd.Timestamp("nat"), df)
  221. def test_mixed_comparison(self):
  222. # GH#13128, GH#22163 != datetime64 vs non-dt64 should be False,
  223. # not raise TypeError
  224. # (this appears to be fixed before GH#22163, not sure when)
  225. df = DataFrame([["1989-08-01", 1], ["1989-08-01", 2]])
  226. other = DataFrame([["a", "b"], ["c", "d"]])
  227. result = df == other
  228. assert not result.any().any()
  229. result = df != other
  230. assert result.all().all()
  231. def test_df_boolean_comparison_error(self):
  232. # GH#4576, GH#22880
  233. # comparing DataFrame against list/tuple with len(obj) matching
  234. # len(df.columns) is supported as of GH#22800
  235. df = DataFrame(np.arange(6).reshape((3, 2)))
  236. expected = DataFrame([[False, False], [True, False], [False, False]])
  237. result = df == (2, 2)
  238. tm.assert_frame_equal(result, expected)
  239. result = df == [2, 2]
  240. tm.assert_frame_equal(result, expected)
  241. def test_df_float_none_comparison(self):
  242. df = DataFrame(
  243. np.random.default_rng(2).standard_normal((8, 3)),
  244. index=range(8),
  245. columns=["A", "B", "C"],
  246. )
  247. result = df.__eq__(None)
  248. assert not result.any().any()
  249. def test_df_string_comparison(self):
  250. df = DataFrame([{"a": 1, "b": "foo"}, {"a": 2, "b": "bar"}])
  251. mask_a = df.a > 1
  252. tm.assert_frame_equal(df[mask_a], df.loc[1:1, :])
  253. tm.assert_frame_equal(df[-mask_a], df.loc[0:0, :])
  254. mask_b = df.b == "foo"
  255. tm.assert_frame_equal(df[mask_b], df.loc[0:0, :])
  256. tm.assert_frame_equal(df[-mask_b], df.loc[1:1, :])
  257. class TestFrameFlexComparisons:
  258. # TODO: test_bool_flex_frame needs a better name
  259. @pytest.mark.parametrize("op", ["eq", "ne", "gt", "lt", "ge", "le"])
  260. def test_bool_flex_frame(self, op):
  261. data = np.random.default_rng(2).standard_normal((5, 3))
  262. other_data = np.random.default_rng(2).standard_normal((5, 3))
  263. df = DataFrame(data)
  264. other = DataFrame(other_data)
  265. ndim_5 = np.ones(df.shape + (1, 3))
  266. # DataFrame
  267. assert df.eq(df).values.all()
  268. assert not df.ne(df).values.any()
  269. f = getattr(df, op)
  270. o = getattr(operator, op)
  271. # No NAs
  272. tm.assert_frame_equal(f(other), o(df, other))
  273. # Unaligned
  274. part_o = other.loc[3:, 1:].copy()
  275. rs = f(part_o)
  276. xp = o(df, part_o.reindex(index=df.index, columns=df.columns))
  277. tm.assert_frame_equal(rs, xp)
  278. # ndarray
  279. tm.assert_frame_equal(f(other.values), o(df, other.values))
  280. # scalar
  281. tm.assert_frame_equal(f(0), o(df, 0))
  282. # NAs
  283. msg = "Unable to coerce to Series/DataFrame"
  284. tm.assert_frame_equal(f(np.nan), o(df, np.nan))
  285. with pytest.raises(ValueError, match=msg):
  286. f(ndim_5)
  287. @pytest.mark.parametrize("box", [np.array, Series])
  288. def test_bool_flex_series(self, box):
  289. # Series
  290. # list/tuple
  291. data = np.random.default_rng(2).standard_normal((5, 3))
  292. df = DataFrame(data)
  293. idx_ser = box(np.random.default_rng(2).standard_normal(5))
  294. col_ser = box(np.random.default_rng(2).standard_normal(3))
  295. idx_eq = df.eq(idx_ser, axis=0)
  296. col_eq = df.eq(col_ser)
  297. idx_ne = df.ne(idx_ser, axis=0)
  298. col_ne = df.ne(col_ser)
  299. tm.assert_frame_equal(col_eq, df == Series(col_ser))
  300. tm.assert_frame_equal(col_eq, -col_ne)
  301. tm.assert_frame_equal(idx_eq, -idx_ne)
  302. tm.assert_frame_equal(idx_eq, df.T.eq(idx_ser).T)
  303. tm.assert_frame_equal(col_eq, df.eq(list(col_ser)))
  304. tm.assert_frame_equal(idx_eq, df.eq(Series(idx_ser), axis=0))
  305. tm.assert_frame_equal(idx_eq, df.eq(list(idx_ser), axis=0))
  306. idx_gt = df.gt(idx_ser, axis=0)
  307. col_gt = df.gt(col_ser)
  308. idx_le = df.le(idx_ser, axis=0)
  309. col_le = df.le(col_ser)
  310. tm.assert_frame_equal(col_gt, df > Series(col_ser))
  311. tm.assert_frame_equal(col_gt, -col_le)
  312. tm.assert_frame_equal(idx_gt, -idx_le)
  313. tm.assert_frame_equal(idx_gt, df.T.gt(idx_ser).T)
  314. idx_ge = df.ge(idx_ser, axis=0)
  315. col_ge = df.ge(col_ser)
  316. idx_lt = df.lt(idx_ser, axis=0)
  317. col_lt = df.lt(col_ser)
  318. tm.assert_frame_equal(col_ge, df >= Series(col_ser))
  319. tm.assert_frame_equal(col_ge, -col_lt)
  320. tm.assert_frame_equal(idx_ge, -idx_lt)
  321. tm.assert_frame_equal(idx_ge, df.T.ge(idx_ser).T)
  322. idx_ser = Series(np.random.default_rng(2).standard_normal(5))
  323. col_ser = Series(np.random.default_rng(2).standard_normal(3))
  324. def test_bool_flex_frame_na(self):
  325. df = DataFrame(np.random.default_rng(2).standard_normal((5, 3)))
  326. # NA
  327. df.loc[0, 0] = np.nan
  328. rs = df.eq(df)
  329. assert not rs.loc[0, 0]
  330. rs = df.ne(df)
  331. assert rs.loc[0, 0]
  332. rs = df.gt(df)
  333. assert not rs.loc[0, 0]
  334. rs = df.lt(df)
  335. assert not rs.loc[0, 0]
  336. rs = df.ge(df)
  337. assert not rs.loc[0, 0]
  338. rs = df.le(df)
  339. assert not rs.loc[0, 0]
  340. def test_bool_flex_frame_complex_dtype(self):
  341. # complex
  342. arr = np.array([np.nan, 1, 6, np.nan])
  343. arr2 = np.array([2j, np.nan, 7, None])
  344. df = DataFrame({"a": arr})
  345. df2 = DataFrame({"a": arr2})
  346. msg = "|".join(
  347. [
  348. "'>' not supported between instances of '.*' and 'complex'",
  349. r"unorderable types: .*complex\(\)", # PY35
  350. ]
  351. )
  352. with pytest.raises(TypeError, match=msg):
  353. # inequalities are not well-defined for complex numbers
  354. df.gt(df2)
  355. with pytest.raises(TypeError, match=msg):
  356. # regression test that we get the same behavior for Series
  357. df["a"].gt(df2["a"])
  358. with pytest.raises(TypeError, match=msg):
  359. # Check that we match numpy behavior here
  360. df.values > df2.values
  361. rs = df.ne(df2)
  362. assert rs.values.all()
  363. arr3 = np.array([2j, np.nan, None])
  364. df3 = DataFrame({"a": arr3})
  365. with pytest.raises(TypeError, match=msg):
  366. # inequalities are not well-defined for complex numbers
  367. df3.gt(2j)
  368. with pytest.raises(TypeError, match=msg):
  369. # regression test that we get the same behavior for Series
  370. df3["a"].gt(2j)
  371. with pytest.raises(TypeError, match=msg):
  372. # Check that we match numpy behavior here
  373. df3.values > 2j
  374. def test_bool_flex_frame_object_dtype(self):
  375. # corner, dtype=object
  376. df1 = DataFrame({"col": ["foo", np.nan, "bar"]}, dtype=object)
  377. df2 = DataFrame({"col": ["foo", datetime.now(), "bar"]}, dtype=object)
  378. result = df1.ne(df2)
  379. exp = DataFrame({"col": [False, True, False]})
  380. tm.assert_frame_equal(result, exp)
  381. def test_flex_comparison_nat(self):
  382. # GH 15697, GH 22163 df.eq(pd.NaT) should behave like df == pd.NaT,
  383. # and _definitely_ not be NaN
  384. df = DataFrame([pd.NaT])
  385. result = df == pd.NaT
  386. # result.iloc[0, 0] is a np.bool_ object
  387. assert result.iloc[0, 0].item() is False
  388. result = df.eq(pd.NaT)
  389. assert result.iloc[0, 0].item() is False
  390. result = df != pd.NaT
  391. assert result.iloc[0, 0].item() is True
  392. result = df.ne(pd.NaT)
  393. assert result.iloc[0, 0].item() is True
  394. @pytest.mark.parametrize("opname", ["eq", "ne", "gt", "lt", "ge", "le"])
  395. def test_df_flex_cmp_constant_return_types(self, opname):
  396. # GH 15077, non-empty DataFrame
  397. df = DataFrame({"x": [1, 2, 3], "y": [1.0, 2.0, 3.0]})
  398. const = 2
  399. result = getattr(df, opname)(const).dtypes.value_counts()
  400. tm.assert_series_equal(
  401. result, Series([2], index=[np.dtype(bool)], name="count")
  402. )
  403. @pytest.mark.parametrize("opname", ["eq", "ne", "gt", "lt", "ge", "le"])
  404. def test_df_flex_cmp_constant_return_types_empty(self, opname):
  405. # GH 15077 empty DataFrame
  406. df = DataFrame({"x": [1, 2, 3], "y": [1.0, 2.0, 3.0]})
  407. const = 2
  408. empty = df.iloc[:0]
  409. result = getattr(empty, opname)(const).dtypes.value_counts()
  410. tm.assert_series_equal(
  411. result, Series([2], index=[np.dtype(bool)], name="count")
  412. )
  413. def test_df_flex_cmp_ea_dtype_with_ndarray_series(self):
  414. ii = pd.IntervalIndex.from_breaks([1, 2, 3])
  415. df = DataFrame({"A": ii, "B": ii})
  416. ser = Series([0, 0])
  417. res = df.eq(ser, axis=0)
  418. expected = DataFrame({"A": [False, False], "B": [False, False]})
  419. tm.assert_frame_equal(res, expected)
  420. ser2 = Series([1, 2], index=["A", "B"])
  421. res2 = df.eq(ser2, axis=1)
  422. tm.assert_frame_equal(res2, expected)
  423. # -------------------------------------------------------------------
  424. # Arithmetic
  425. class TestFrameFlexArithmetic:
  426. def test_floordiv_axis0(self):
  427. # make sure we df.floordiv(ser, axis=0) matches column-wise result
  428. arr = np.arange(3)
  429. ser = Series(arr)
  430. df = DataFrame({"A": ser, "B": ser})
  431. result = df.floordiv(ser, axis=0)
  432. expected = DataFrame({col: df[col] // ser for col in df.columns})
  433. tm.assert_frame_equal(result, expected)
  434. result2 = df.floordiv(ser.values, axis=0)
  435. tm.assert_frame_equal(result2, expected)
  436. def test_df_add_td64_columnwise(self):
  437. # GH 22534 Check that column-wise addition broadcasts correctly
  438. dti = pd.date_range("2016-01-01", periods=10)
  439. tdi = pd.timedelta_range("1", periods=10)
  440. tser = Series(tdi)
  441. df = DataFrame({0: dti, 1: tdi})
  442. result = df.add(tser, axis=0)
  443. expected = DataFrame({0: dti + tdi, 1: tdi + tdi})
  444. tm.assert_frame_equal(result, expected)
  445. def test_df_add_flex_filled_mixed_dtypes(self):
  446. # GH 19611
  447. dti = pd.date_range("2016-01-01", periods=3)
  448. ser = Series(["1 Day", "NaT", "2 Days"], dtype="timedelta64[ns]")
  449. df = DataFrame({"A": dti, "B": ser})
  450. other = DataFrame({"A": ser, "B": ser})
  451. fill = pd.Timedelta(days=1).to_timedelta64()
  452. result = df.add(other, fill_value=fill)
  453. expected = DataFrame(
  454. {
  455. "A": Series(
  456. ["2016-01-02", "2016-01-03", "2016-01-05"], dtype="datetime64[ns]"
  457. ),
  458. "B": ser * 2,
  459. }
  460. )
  461. tm.assert_frame_equal(result, expected)
  462. def test_arith_flex_frame(
  463. self, all_arithmetic_operators, float_frame, mixed_float_frame
  464. ):
  465. # one instance of parametrized fixture
  466. op = all_arithmetic_operators
  467. def f(x, y):
  468. # r-versions not in operator-stdlib; get op without "r" and invert
  469. if op.startswith("__r"):
  470. return getattr(operator, op.replace("__r", "__"))(y, x)
  471. return getattr(operator, op)(x, y)
  472. result = getattr(float_frame, op)(2 * float_frame)
  473. expected = f(float_frame, 2 * float_frame)
  474. tm.assert_frame_equal(result, expected)
  475. # vs mix float
  476. result = getattr(mixed_float_frame, op)(2 * mixed_float_frame)
  477. expected = f(mixed_float_frame, 2 * mixed_float_frame)
  478. tm.assert_frame_equal(result, expected)
  479. _check_mixed_float(result, dtype={"C": None})
  480. @pytest.mark.parametrize("op", ["__add__", "__sub__", "__mul__"])
  481. def test_arith_flex_frame_mixed(
  482. self,
  483. op,
  484. int_frame,
  485. mixed_int_frame,
  486. mixed_float_frame,
  487. switch_numexpr_min_elements,
  488. ):
  489. f = getattr(operator, op)
  490. # vs mix int
  491. result = getattr(mixed_int_frame, op)(2 + mixed_int_frame)
  492. expected = f(mixed_int_frame, 2 + mixed_int_frame)
  493. # no overflow in the uint
  494. dtype = None
  495. if op in ["__sub__"]:
  496. dtype = {"B": "uint64", "C": None}
  497. elif op in ["__add__", "__mul__"]:
  498. dtype = {"C": None}
  499. if expr.USE_NUMEXPR and switch_numexpr_min_elements == 0:
  500. # when using numexpr, the casting rules are slightly different:
  501. # in the `2 + mixed_int_frame` operation, int32 column becomes
  502. # and int64 column (not preserving dtype in operation with Python
  503. # scalar), and then the int32/int64 combo results in int64 result
  504. dtype["A"] = (2 + mixed_int_frame)["A"].dtype
  505. tm.assert_frame_equal(result, expected)
  506. _check_mixed_int(result, dtype=dtype)
  507. # vs mix float
  508. result = getattr(mixed_float_frame, op)(2 * mixed_float_frame)
  509. expected = f(mixed_float_frame, 2 * mixed_float_frame)
  510. tm.assert_frame_equal(result, expected)
  511. _check_mixed_float(result, dtype={"C": None})
  512. # vs plain int
  513. result = getattr(int_frame, op)(2 * int_frame)
  514. expected = f(int_frame, 2 * int_frame)
  515. tm.assert_frame_equal(result, expected)
  516. @pytest.mark.parametrize("dim", range(3, 6))
  517. def test_arith_flex_frame_raise(self, all_arithmetic_operators, float_frame, dim):
  518. # one instance of parametrized fixture
  519. op = all_arithmetic_operators
  520. # Check that arrays with dim >= 3 raise
  521. arr = np.ones((1,) * dim)
  522. msg = "Unable to coerce to Series/DataFrame"
  523. with pytest.raises(ValueError, match=msg):
  524. getattr(float_frame, op)(arr)
  525. def test_arith_flex_frame_corner(self, float_frame):
  526. const_add = float_frame.add(1)
  527. tm.assert_frame_equal(const_add, float_frame + 1)
  528. # corner cases
  529. result = float_frame.add(float_frame[:0])
  530. expected = float_frame.sort_index() * np.nan
  531. tm.assert_frame_equal(result, expected)
  532. result = float_frame[:0].add(float_frame)
  533. expected = float_frame.sort_index() * np.nan
  534. tm.assert_frame_equal(result, expected)
  535. with pytest.raises(NotImplementedError, match="fill_value"):
  536. float_frame.add(float_frame.iloc[0], fill_value=3)
  537. with pytest.raises(NotImplementedError, match="fill_value"):
  538. float_frame.add(float_frame.iloc[0], axis="index", fill_value=3)
  539. @pytest.mark.parametrize("op", ["add", "sub", "mul", "mod"])
  540. def test_arith_flex_series_ops(self, simple_frame, op):
  541. # after arithmetic refactor, add truediv here
  542. df = simple_frame
  543. row = df.xs("a")
  544. col = df["two"]
  545. f = getattr(df, op)
  546. op = getattr(operator, op)
  547. tm.assert_frame_equal(f(row), op(df, row))
  548. tm.assert_frame_equal(f(col, axis=0), op(df.T, col).T)
  549. def test_arith_flex_series(self, simple_frame):
  550. df = simple_frame
  551. row = df.xs("a")
  552. col = df["two"]
  553. # special case for some reason
  554. tm.assert_frame_equal(df.add(row, axis=None), df + row)
  555. # cases which will be refactored after big arithmetic refactor
  556. tm.assert_frame_equal(df.div(row), df / row)
  557. tm.assert_frame_equal(df.div(col, axis=0), (df.T / col).T)
  558. @pytest.mark.parametrize("dtype", ["int64", "float64"])
  559. def test_arith_flex_series_broadcasting(self, dtype):
  560. # broadcasting issue in GH 7325
  561. df = DataFrame(np.arange(3 * 2).reshape((3, 2)), dtype=dtype)
  562. expected = DataFrame([[np.nan, np.inf], [1.0, 1.5], [1.0, 1.25]])
  563. result = df.div(df[0], axis="index")
  564. tm.assert_frame_equal(result, expected)
  565. def test_arith_flex_zero_len_raises(self):
  566. # GH 19522 passing fill_value to frame flex arith methods should
  567. # raise even in the zero-length special cases
  568. ser_len0 = Series([], dtype=object)
  569. df_len0 = DataFrame(columns=["A", "B"])
  570. df = DataFrame([[1, 2], [3, 4]], columns=["A", "B"])
  571. with pytest.raises(NotImplementedError, match="fill_value"):
  572. df.add(ser_len0, fill_value="E")
  573. with pytest.raises(NotImplementedError, match="fill_value"):
  574. df_len0.sub(df["A"], axis=None, fill_value=3)
  575. def test_flex_add_scalar_fill_value(self):
  576. # GH#12723
  577. dat = np.array([0, 1, np.nan, 3, 4, 5], dtype="float")
  578. df = DataFrame({"foo": dat}, index=range(6))
  579. exp = df.fillna(0).add(2)
  580. res = df.add(2, fill_value=0)
  581. tm.assert_frame_equal(res, exp)
  582. def test_sub_alignment_with_duplicate_index(self):
  583. # GH#5185 dup aligning operations should work
  584. df1 = DataFrame([1, 2, 3, 4, 5], index=[1, 2, 1, 2, 3])
  585. df2 = DataFrame([1, 2, 3], index=[1, 2, 3])
  586. expected = DataFrame([0, 2, 0, 2, 2], index=[1, 1, 2, 2, 3])
  587. result = df1.sub(df2)
  588. tm.assert_frame_equal(result, expected)
  589. @pytest.mark.parametrize("op", ["__add__", "__mul__", "__sub__", "__truediv__"])
  590. def test_arithmetic_with_duplicate_columns(self, op):
  591. # operations
  592. df = DataFrame({"A": np.arange(10), "B": np.random.default_rng(2).random(10)})
  593. expected = getattr(df, op)(df)
  594. expected.columns = ["A", "A"]
  595. df.columns = ["A", "A"]
  596. result = getattr(df, op)(df)
  597. tm.assert_frame_equal(result, expected)
  598. @pytest.mark.parametrize("level", [0, None])
  599. def test_broadcast_multiindex(self, level):
  600. # GH34388
  601. df1 = DataFrame({"A": [0, 1, 2], "B": [1, 2, 3]})
  602. df1.columns = df1.columns.set_names("L1")
  603. df2 = DataFrame({("A", "C"): [0, 0, 0], ("A", "D"): [0, 0, 0]})
  604. df2.columns = df2.columns.set_names(["L1", "L2"])
  605. result = df1.add(df2, level=level)
  606. expected = DataFrame({("A", "C"): [0, 1, 2], ("A", "D"): [0, 1, 2]})
  607. expected.columns = expected.columns.set_names(["L1", "L2"])
  608. tm.assert_frame_equal(result, expected)
  609. def test_frame_multiindex_operations(self):
  610. # GH 43321
  611. df = DataFrame(
  612. {2010: [1, 2, 3], 2020: [3, 4, 5]},
  613. index=MultiIndex.from_product(
  614. [["a"], ["b"], [0, 1, 2]], names=["scen", "mod", "id"]
  615. ),
  616. )
  617. series = Series(
  618. [0.4],
  619. index=MultiIndex.from_product([["b"], ["a"]], names=["mod", "scen"]),
  620. )
  621. expected = DataFrame(
  622. {2010: [1.4, 2.4, 3.4], 2020: [3.4, 4.4, 5.4]},
  623. index=MultiIndex.from_product(
  624. [["a"], ["b"], [0, 1, 2]], names=["scen", "mod", "id"]
  625. ),
  626. )
  627. result = df.add(series, axis=0)
  628. tm.assert_frame_equal(result, expected)
  629. def test_frame_multiindex_operations_series_index_to_frame_index(self):
  630. # GH 43321
  631. df = DataFrame(
  632. {2010: [1], 2020: [3]},
  633. index=MultiIndex.from_product([["a"], ["b"]], names=["scen", "mod"]),
  634. )
  635. series = Series(
  636. [10.0, 20.0, 30.0],
  637. index=MultiIndex.from_product(
  638. [["a"], ["b"], [0, 1, 2]], names=["scen", "mod", "id"]
  639. ),
  640. )
  641. expected = DataFrame(
  642. {2010: [11.0, 21, 31.0], 2020: [13.0, 23.0, 33.0]},
  643. index=MultiIndex.from_product(
  644. [["a"], ["b"], [0, 1, 2]], names=["scen", "mod", "id"]
  645. ),
  646. )
  647. result = df.add(series, axis=0)
  648. tm.assert_frame_equal(result, expected)
  649. def test_frame_multiindex_operations_no_align(self):
  650. df = DataFrame(
  651. {2010: [1, 2, 3], 2020: [3, 4, 5]},
  652. index=MultiIndex.from_product(
  653. [["a"], ["b"], [0, 1, 2]], names=["scen", "mod", "id"]
  654. ),
  655. )
  656. series = Series(
  657. [0.4],
  658. index=MultiIndex.from_product([["c"], ["a"]], names=["mod", "scen"]),
  659. )
  660. expected = DataFrame(
  661. {2010: np.nan, 2020: np.nan},
  662. index=MultiIndex.from_tuples(
  663. [
  664. ("a", "b", 0),
  665. ("a", "b", 1),
  666. ("a", "b", 2),
  667. ("a", "c", np.nan),
  668. ],
  669. names=["scen", "mod", "id"],
  670. ),
  671. )
  672. result = df.add(series, axis=0)
  673. tm.assert_frame_equal(result, expected)
  674. def test_frame_multiindex_operations_part_align(self):
  675. df = DataFrame(
  676. {2010: [1, 2, 3], 2020: [3, 4, 5]},
  677. index=MultiIndex.from_tuples(
  678. [
  679. ("a", "b", 0),
  680. ("a", "b", 1),
  681. ("a", "c", 2),
  682. ],
  683. names=["scen", "mod", "id"],
  684. ),
  685. )
  686. series = Series(
  687. [0.4],
  688. index=MultiIndex.from_product([["b"], ["a"]], names=["mod", "scen"]),
  689. )
  690. expected = DataFrame(
  691. {2010: [1.4, 2.4, np.nan], 2020: [3.4, 4.4, np.nan]},
  692. index=MultiIndex.from_tuples(
  693. [
  694. ("a", "b", 0),
  695. ("a", "b", 1),
  696. ("a", "c", 2),
  697. ],
  698. names=["scen", "mod", "id"],
  699. ),
  700. )
  701. result = df.add(series, axis=0)
  702. tm.assert_frame_equal(result, expected)
  703. class TestFrameArithmetic:
  704. def test_td64_op_nat_casting(self):
  705. # Make sure we don't accidentally treat timedelta64(NaT) as datetime64
  706. # when calling dispatch_to_series in DataFrame arithmetic
  707. ser = Series(["NaT", "NaT"], dtype="timedelta64[ns]")
  708. df = DataFrame([[1, 2], [3, 4]])
  709. result = df * ser
  710. expected = DataFrame({0: ser, 1: ser})
  711. tm.assert_frame_equal(result, expected)
  712. def test_df_add_2d_array_rowlike_broadcasts(self):
  713. # GH#23000
  714. arr = np.arange(6).reshape(3, 2)
  715. df = DataFrame(arr, columns=[True, False], index=["A", "B", "C"])
  716. rowlike = arr[[1], :] # shape --> (1, ncols)
  717. assert rowlike.shape == (1, df.shape[1])
  718. expected = DataFrame(
  719. [[2, 4], [4, 6], [6, 8]],
  720. columns=df.columns,
  721. index=df.index,
  722. # specify dtype explicitly to avoid failing
  723. # on 32bit builds
  724. dtype=arr.dtype,
  725. )
  726. result = df + rowlike
  727. tm.assert_frame_equal(result, expected)
  728. result = rowlike + df
  729. tm.assert_frame_equal(result, expected)
  730. def test_df_add_2d_array_collike_broadcasts(self):
  731. # GH#23000
  732. arr = np.arange(6).reshape(3, 2)
  733. df = DataFrame(arr, columns=[True, False], index=["A", "B", "C"])
  734. collike = arr[:, [1]] # shape --> (nrows, 1)
  735. assert collike.shape == (df.shape[0], 1)
  736. expected = DataFrame(
  737. [[1, 2], [5, 6], [9, 10]],
  738. columns=df.columns,
  739. index=df.index,
  740. # specify dtype explicitly to avoid failing
  741. # on 32bit builds
  742. dtype=arr.dtype,
  743. )
  744. result = df + collike
  745. tm.assert_frame_equal(result, expected)
  746. result = collike + df
  747. tm.assert_frame_equal(result, expected)
  748. def test_df_arith_2d_array_rowlike_broadcasts(
  749. self, request, all_arithmetic_operators, using_array_manager
  750. ):
  751. # GH#23000
  752. opname = all_arithmetic_operators
  753. if using_array_manager and opname in ("__rmod__", "__rfloordiv__"):
  754. # TODO(ArrayManager) decide on dtypes
  755. td.mark_array_manager_not_yet_implemented(request)
  756. arr = np.arange(6).reshape(3, 2)
  757. df = DataFrame(arr, columns=[True, False], index=["A", "B", "C"])
  758. rowlike = arr[[1], :] # shape --> (1, ncols)
  759. assert rowlike.shape == (1, df.shape[1])
  760. exvals = [
  761. getattr(df.loc["A"], opname)(rowlike.squeeze()),
  762. getattr(df.loc["B"], opname)(rowlike.squeeze()),
  763. getattr(df.loc["C"], opname)(rowlike.squeeze()),
  764. ]
  765. expected = DataFrame(exvals, columns=df.columns, index=df.index)
  766. result = getattr(df, opname)(rowlike)
  767. tm.assert_frame_equal(result, expected)
  768. def test_df_arith_2d_array_collike_broadcasts(
  769. self, request, all_arithmetic_operators, using_array_manager
  770. ):
  771. # GH#23000
  772. opname = all_arithmetic_operators
  773. if using_array_manager and opname in ("__rmod__", "__rfloordiv__"):
  774. # TODO(ArrayManager) decide on dtypes
  775. td.mark_array_manager_not_yet_implemented(request)
  776. arr = np.arange(6).reshape(3, 2)
  777. df = DataFrame(arr, columns=[True, False], index=["A", "B", "C"])
  778. collike = arr[:, [1]] # shape --> (nrows, 1)
  779. assert collike.shape == (df.shape[0], 1)
  780. exvals = {
  781. True: getattr(df[True], opname)(collike.squeeze()),
  782. False: getattr(df[False], opname)(collike.squeeze()),
  783. }
  784. dtype = None
  785. if opname in ["__rmod__", "__rfloordiv__"]:
  786. # Series ops may return mixed int/float dtypes in cases where
  787. # DataFrame op will return all-float. So we upcast `expected`
  788. dtype = np.common_type(*(x.values for x in exvals.values()))
  789. expected = DataFrame(exvals, columns=df.columns, index=df.index, dtype=dtype)
  790. result = getattr(df, opname)(collike)
  791. tm.assert_frame_equal(result, expected)
  792. def test_df_bool_mul_int(self):
  793. # GH 22047, GH 22163 multiplication by 1 should result in int dtype,
  794. # not object dtype
  795. df = DataFrame([[False, True], [False, False]])
  796. result = df * 1
  797. # On appveyor this comes back as np.int32 instead of np.int64,
  798. # so we check dtype.kind instead of just dtype
  799. kinds = result.dtypes.apply(lambda x: x.kind)
  800. assert (kinds == "i").all()
  801. result = 1 * df
  802. kinds = result.dtypes.apply(lambda x: x.kind)
  803. assert (kinds == "i").all()
  804. def test_arith_mixed(self):
  805. left = DataFrame({"A": ["a", "b", "c"], "B": [1, 2, 3]})
  806. result = left + left
  807. expected = DataFrame({"A": ["aa", "bb", "cc"], "B": [2, 4, 6]})
  808. tm.assert_frame_equal(result, expected)
  809. @pytest.mark.parametrize("col", ["A", "B"])
  810. def test_arith_getitem_commute(self, all_arithmetic_functions, col):
  811. df = DataFrame({"A": [1.1, 3.3], "B": [2.5, -3.9]})
  812. result = all_arithmetic_functions(df, 1)[col]
  813. expected = all_arithmetic_functions(df[col], 1)
  814. tm.assert_series_equal(result, expected)
  815. @pytest.mark.parametrize(
  816. "values", [[1, 2], (1, 2), np.array([1, 2]), range(1, 3), deque([1, 2])]
  817. )
  818. def test_arith_alignment_non_pandas_object(self, values):
  819. # GH#17901
  820. df = DataFrame({"A": [1, 1], "B": [1, 1]})
  821. expected = DataFrame({"A": [2, 2], "B": [3, 3]})
  822. result = df + values
  823. tm.assert_frame_equal(result, expected)
  824. def test_arith_non_pandas_object(self):
  825. df = DataFrame(
  826. np.arange(1, 10, dtype="f8").reshape(3, 3),
  827. columns=["one", "two", "three"],
  828. index=["a", "b", "c"],
  829. )
  830. val1 = df.xs("a").values
  831. added = DataFrame(df.values + val1, index=df.index, columns=df.columns)
  832. tm.assert_frame_equal(df + val1, added)
  833. added = DataFrame((df.values.T + val1).T, index=df.index, columns=df.columns)
  834. tm.assert_frame_equal(df.add(val1, axis=0), added)
  835. val2 = list(df["two"])
  836. added = DataFrame(df.values + val2, index=df.index, columns=df.columns)
  837. tm.assert_frame_equal(df + val2, added)
  838. added = DataFrame((df.values.T + val2).T, index=df.index, columns=df.columns)
  839. tm.assert_frame_equal(df.add(val2, axis="index"), added)
  840. val3 = np.random.default_rng(2).random(df.shape)
  841. added = DataFrame(df.values + val3, index=df.index, columns=df.columns)
  842. tm.assert_frame_equal(df.add(val3), added)
  843. def test_operations_with_interval_categories_index(self, all_arithmetic_operators):
  844. # GH#27415
  845. op = all_arithmetic_operators
  846. ind = pd.CategoricalIndex(pd.interval_range(start=0.0, end=2.0))
  847. data = [1, 2]
  848. df = DataFrame([data], columns=ind)
  849. num = 10
  850. result = getattr(df, op)(num)
  851. expected = DataFrame([[getattr(n, op)(num) for n in data]], columns=ind)
  852. tm.assert_frame_equal(result, expected)
  853. def test_frame_with_frame_reindex(self):
  854. # GH#31623
  855. df = DataFrame(
  856. {
  857. "foo": [pd.Timestamp("2019"), pd.Timestamp("2020")],
  858. "bar": [pd.Timestamp("2018"), pd.Timestamp("2021")],
  859. },
  860. columns=["foo", "bar"],
  861. dtype="M8[ns]",
  862. )
  863. df2 = df[["foo"]]
  864. result = df - df2
  865. expected = DataFrame(
  866. {"foo": [pd.Timedelta(0), pd.Timedelta(0)], "bar": [np.nan, np.nan]},
  867. columns=["bar", "foo"],
  868. )
  869. tm.assert_frame_equal(result, expected)
  870. @pytest.mark.parametrize(
  871. "value, dtype",
  872. [
  873. (1, "i8"),
  874. (1.0, "f8"),
  875. (2**63, "f8"),
  876. (1j, "complex128"),
  877. (2**63, "complex128"),
  878. (True, "bool"),
  879. (np.timedelta64(20, "ns"), "<m8[ns]"),
  880. (np.datetime64(20, "ns"), "<M8[ns]"),
  881. ],
  882. )
  883. @pytest.mark.parametrize(
  884. "op",
  885. [
  886. operator.add,
  887. operator.sub,
  888. operator.mul,
  889. operator.truediv,
  890. operator.mod,
  891. operator.pow,
  892. ],
  893. ids=lambda x: x.__name__,
  894. )
  895. def test_binop_other(self, op, value, dtype, switch_numexpr_min_elements):
  896. skip = {
  897. (operator.truediv, "bool"),
  898. (operator.pow, "bool"),
  899. (operator.add, "bool"),
  900. (operator.mul, "bool"),
  901. }
  902. elem = DummyElement(value, dtype)
  903. df = DataFrame({"A": [elem.value, elem.value]}, dtype=elem.dtype)
  904. invalid = {
  905. (operator.pow, "<M8[ns]"),
  906. (operator.mod, "<M8[ns]"),
  907. (operator.truediv, "<M8[ns]"),
  908. (operator.mul, "<M8[ns]"),
  909. (operator.add, "<M8[ns]"),
  910. (operator.pow, "<m8[ns]"),
  911. (operator.mul, "<m8[ns]"),
  912. (operator.sub, "bool"),
  913. (operator.mod, "complex128"),
  914. }
  915. if (op, dtype) in invalid:
  916. warn = None
  917. if (dtype == "<M8[ns]" and op == operator.add) or (
  918. dtype == "<m8[ns]" and op == operator.mul
  919. ):
  920. msg = None
  921. elif dtype == "complex128":
  922. msg = "ufunc 'remainder' not supported for the input types"
  923. elif op is operator.sub:
  924. msg = "numpy boolean subtract, the `-` operator, is "
  925. if (
  926. dtype == "bool"
  927. and expr.USE_NUMEXPR
  928. and switch_numexpr_min_elements == 0
  929. ):
  930. warn = UserWarning # "evaluating in Python space because ..."
  931. else:
  932. msg = (
  933. f"cannot perform __{op.__name__}__ with this "
  934. "index type: (DatetimeArray|TimedeltaArray)"
  935. )
  936. with pytest.raises(TypeError, match=msg):
  937. with tm.assert_produces_warning(warn):
  938. op(df, elem.value)
  939. elif (op, dtype) in skip:
  940. if op in [operator.add, operator.mul]:
  941. if expr.USE_NUMEXPR and switch_numexpr_min_elements == 0:
  942. # "evaluating in Python space because ..."
  943. warn = UserWarning
  944. else:
  945. warn = None
  946. with tm.assert_produces_warning(warn):
  947. op(df, elem.value)
  948. else:
  949. msg = "operator '.*' not implemented for .* dtypes"
  950. with pytest.raises(NotImplementedError, match=msg):
  951. op(df, elem.value)
  952. else:
  953. with tm.assert_produces_warning(None):
  954. result = op(df, elem.value).dtypes
  955. expected = op(df, value).dtypes
  956. tm.assert_series_equal(result, expected)
  957. def test_arithmetic_midx_cols_different_dtypes(self):
  958. # GH#49769
  959. midx = MultiIndex.from_arrays([Series([1, 2]), Series([3, 4])])
  960. midx2 = MultiIndex.from_arrays([Series([1, 2], dtype="Int8"), Series([3, 4])])
  961. left = DataFrame([[1, 2], [3, 4]], columns=midx)
  962. right = DataFrame([[1, 2], [3, 4]], columns=midx2)
  963. result = left - right
  964. expected = DataFrame([[0, 0], [0, 0]], columns=midx)
  965. tm.assert_frame_equal(result, expected)
  966. def test_arithmetic_midx_cols_different_dtypes_different_order(self):
  967. # GH#49769
  968. midx = MultiIndex.from_arrays([Series([1, 2]), Series([3, 4])])
  969. midx2 = MultiIndex.from_arrays([Series([2, 1], dtype="Int8"), Series([4, 3])])
  970. left = DataFrame([[1, 2], [3, 4]], columns=midx)
  971. right = DataFrame([[1, 2], [3, 4]], columns=midx2)
  972. result = left - right
  973. expected = DataFrame([[-1, 1], [-1, 1]], columns=midx)
  974. tm.assert_frame_equal(result, expected)
  975. def test_frame_with_zero_len_series_corner_cases():
  976. # GH#28600
  977. # easy all-float case
  978. df = DataFrame(
  979. np.random.default_rng(2).standard_normal(6).reshape(3, 2), columns=["A", "B"]
  980. )
  981. ser = Series(dtype=np.float64)
  982. result = df + ser
  983. expected = DataFrame(df.values * np.nan, columns=df.columns)
  984. tm.assert_frame_equal(result, expected)
  985. with pytest.raises(ValueError, match="not aligned"):
  986. # Automatic alignment for comparisons deprecated GH#36795, enforced 2.0
  987. df == ser
  988. # non-float case should not raise TypeError on comparison
  989. df2 = DataFrame(df.values.view("M8[ns]"), columns=df.columns)
  990. with pytest.raises(ValueError, match="not aligned"):
  991. # Automatic alignment for comparisons deprecated
  992. df2 == ser
  993. def test_zero_len_frame_with_series_corner_cases():
  994. # GH#28600
  995. df = DataFrame(columns=["A", "B"], dtype=np.float64)
  996. ser = Series([1, 2], index=["A", "B"])
  997. result = df + ser
  998. expected = df
  999. tm.assert_frame_equal(result, expected)
  1000. def test_frame_single_columns_object_sum_axis_1():
  1001. # GH 13758
  1002. data = {
  1003. "One": Series(["A", 1.2, np.nan]),
  1004. }
  1005. df = DataFrame(data)
  1006. result = df.sum(axis=1)
  1007. expected = Series(["A", 1.2, 0])
  1008. tm.assert_series_equal(result, expected)
  1009. # -------------------------------------------------------------------
  1010. # Unsorted
  1011. # These arithmetic tests were previously in other files, eventually
  1012. # should be parametrized and put into tests.arithmetic
  1013. class TestFrameArithmeticUnsorted:
  1014. def test_frame_add_tz_mismatch_converts_to_utc(self):
  1015. rng = pd.date_range("1/1/2011", periods=10, freq="h", tz="US/Eastern")
  1016. df = DataFrame(
  1017. np.random.default_rng(2).standard_normal(len(rng)), index=rng, columns=["a"]
  1018. )
  1019. df_moscow = df.tz_convert("Europe/Moscow")
  1020. result = df + df_moscow
  1021. assert result.index.tz is timezone.utc
  1022. result = df_moscow + df
  1023. assert result.index.tz is timezone.utc
  1024. def test_align_frame(self):
  1025. rng = pd.period_range("1/1/2000", "1/1/2010", freq="Y")
  1026. ts = DataFrame(
  1027. np.random.default_rng(2).standard_normal((len(rng), 3)), index=rng
  1028. )
  1029. result = ts + ts[::2]
  1030. expected = ts + ts
  1031. expected.iloc[1::2] = np.nan
  1032. tm.assert_frame_equal(result, expected)
  1033. half = ts[::2]
  1034. result = ts + half.take(np.random.default_rng(2).permutation(len(half)))
  1035. tm.assert_frame_equal(result, expected)
  1036. @pytest.mark.parametrize(
  1037. "op", [operator.add, operator.sub, operator.mul, operator.truediv]
  1038. )
  1039. def test_operators_none_as_na(self, op):
  1040. df = DataFrame(
  1041. {"col1": [2, 5.0, 123, None], "col2": [1, 2, 3, 4]}, dtype=object
  1042. )
  1043. # since filling converts dtypes from object, changed expected to be
  1044. # object
  1045. msg = "Downcasting object dtype arrays"
  1046. with tm.assert_produces_warning(FutureWarning, match=msg):
  1047. filled = df.fillna(np.nan)
  1048. result = op(df, 3)
  1049. expected = op(filled, 3).astype(object)
  1050. expected[pd.isna(expected)] = np.nan
  1051. tm.assert_frame_equal(result, expected)
  1052. result = op(df, df)
  1053. expected = op(filled, filled).astype(object)
  1054. expected[pd.isna(expected)] = np.nan
  1055. tm.assert_frame_equal(result, expected)
  1056. msg = "Downcasting object dtype arrays"
  1057. with tm.assert_produces_warning(FutureWarning, match=msg):
  1058. result = op(df, df.fillna(7))
  1059. tm.assert_frame_equal(result, expected)
  1060. msg = "Downcasting object dtype arrays"
  1061. with tm.assert_produces_warning(FutureWarning, match=msg):
  1062. result = op(df.fillna(7), df)
  1063. tm.assert_frame_equal(result, expected)
  1064. @pytest.mark.parametrize("op,res", [("__eq__", False), ("__ne__", True)])
  1065. # TODO: not sure what's correct here.
  1066. @pytest.mark.filterwarnings("ignore:elementwise:FutureWarning")
  1067. def test_logical_typeerror_with_non_valid(self, op, res, float_frame):
  1068. # we are comparing floats vs a string
  1069. result = getattr(float_frame, op)("foo")
  1070. assert bool(result.all().all()) is res
  1071. @pytest.mark.parametrize("op", ["add", "sub", "mul", "div", "truediv"])
  1072. def test_binary_ops_align(self, op):
  1073. # test aligning binary ops
  1074. # GH 6681
  1075. index = MultiIndex.from_product(
  1076. [list("abc"), ["one", "two", "three"], [1, 2, 3]],
  1077. names=["first", "second", "third"],
  1078. )
  1079. df = DataFrame(
  1080. np.arange(27 * 3).reshape(27, 3),
  1081. index=index,
  1082. columns=["value1", "value2", "value3"],
  1083. ).sort_index()
  1084. idx = pd.IndexSlice
  1085. opa = getattr(operator, op, None)
  1086. if opa is None:
  1087. return
  1088. x = Series([1.0, 10.0, 100.0], [1, 2, 3])
  1089. result = getattr(df, op)(x, level="third", axis=0)
  1090. expected = pd.concat(
  1091. [opa(df.loc[idx[:, :, i], :], v) for i, v in x.items()]
  1092. ).sort_index()
  1093. tm.assert_frame_equal(result, expected)
  1094. x = Series([1.0, 10.0], ["two", "three"])
  1095. result = getattr(df, op)(x, level="second", axis=0)
  1096. expected = (
  1097. pd.concat([opa(df.loc[idx[:, i], :], v) for i, v in x.items()])
  1098. .reindex_like(df)
  1099. .sort_index()
  1100. )
  1101. tm.assert_frame_equal(result, expected)
  1102. def test_binary_ops_align_series_dataframe(self):
  1103. # GH9463 (alignment level of dataframe with series)
  1104. midx = MultiIndex.from_product([["A", "B"], ["a", "b"]])
  1105. df = DataFrame(np.ones((2, 4), dtype="int64"), columns=midx)
  1106. s = Series({"a": 1, "b": 2})
  1107. df2 = df.copy()
  1108. df2.columns.names = ["lvl0", "lvl1"]
  1109. s2 = s.copy()
  1110. s2.index.name = "lvl1"
  1111. # different cases of integer/string level names:
  1112. res1 = df.mul(s, axis=1, level=1)
  1113. res2 = df.mul(s2, axis=1, level=1)
  1114. res3 = df2.mul(s, axis=1, level=1)
  1115. res4 = df2.mul(s2, axis=1, level=1)
  1116. res5 = df2.mul(s, axis=1, level="lvl1")
  1117. res6 = df2.mul(s2, axis=1, level="lvl1")
  1118. exp = DataFrame(
  1119. np.array([[1, 2, 1, 2], [1, 2, 1, 2]], dtype="int64"), columns=midx
  1120. )
  1121. for res in [res1, res2]:
  1122. tm.assert_frame_equal(res, exp)
  1123. exp.columns.names = ["lvl0", "lvl1"]
  1124. for res in [res3, res4, res5, res6]:
  1125. tm.assert_frame_equal(res, exp)
  1126. def test_add_with_dti_mismatched_tzs(self):
  1127. base = pd.DatetimeIndex(["2011-01-01", "2011-01-02", "2011-01-03"], tz="UTC")
  1128. idx1 = base.tz_convert("Asia/Tokyo")[:2]
  1129. idx2 = base.tz_convert("US/Eastern")[1:]
  1130. df1 = DataFrame({"A": [1, 2]}, index=idx1)
  1131. df2 = DataFrame({"A": [1, 1]}, index=idx2)
  1132. exp = DataFrame({"A": [np.nan, 3, np.nan]}, index=base)
  1133. tm.assert_frame_equal(df1 + df2, exp)
  1134. def test_combineFrame(self, float_frame, mixed_float_frame, mixed_int_frame):
  1135. frame_copy = float_frame.reindex(float_frame.index[::2])
  1136. del frame_copy["D"]
  1137. # adding NAs to first 5 values of column "C"
  1138. frame_copy.loc[: frame_copy.index[4], "C"] = np.nan
  1139. added = float_frame + frame_copy
  1140. indexer = added["A"].dropna().index
  1141. exp = (float_frame["A"] * 2).copy()
  1142. tm.assert_series_equal(added["A"].dropna(), exp.loc[indexer])
  1143. exp.loc[~exp.index.isin(indexer)] = np.nan
  1144. tm.assert_series_equal(added["A"], exp.loc[added["A"].index])
  1145. assert np.isnan(added["C"].reindex(frame_copy.index)[:5]).all()
  1146. # assert(False)
  1147. assert np.isnan(added["D"]).all()
  1148. self_added = float_frame + float_frame
  1149. tm.assert_index_equal(self_added.index, float_frame.index)
  1150. added_rev = frame_copy + float_frame
  1151. assert np.isnan(added["D"]).all()
  1152. assert np.isnan(added_rev["D"]).all()
  1153. # corner cases
  1154. # empty
  1155. plus_empty = float_frame + DataFrame()
  1156. assert np.isnan(plus_empty.values).all()
  1157. empty_plus = DataFrame() + float_frame
  1158. assert np.isnan(empty_plus.values).all()
  1159. empty_empty = DataFrame() + DataFrame()
  1160. assert empty_empty.empty
  1161. # out of order
  1162. reverse = float_frame.reindex(columns=float_frame.columns[::-1])
  1163. tm.assert_frame_equal(reverse + float_frame, float_frame * 2)
  1164. # mix vs float64, upcast
  1165. added = float_frame + mixed_float_frame
  1166. _check_mixed_float(added, dtype="float64")
  1167. added = mixed_float_frame + float_frame
  1168. _check_mixed_float(added, dtype="float64")
  1169. # mix vs mix
  1170. added = mixed_float_frame + mixed_float_frame
  1171. _check_mixed_float(added, dtype={"C": None})
  1172. # with int
  1173. added = float_frame + mixed_int_frame
  1174. _check_mixed_float(added, dtype="float64")
  1175. def test_combine_series(self, float_frame, mixed_float_frame, mixed_int_frame):
  1176. # Series
  1177. series = float_frame.xs(float_frame.index[0])
  1178. added = float_frame + series
  1179. for key, s in added.items():
  1180. tm.assert_series_equal(s, float_frame[key] + series[key])
  1181. larger_series = series.to_dict()
  1182. larger_series["E"] = 1
  1183. larger_series = Series(larger_series)
  1184. larger_added = float_frame + larger_series
  1185. for key, s in float_frame.items():
  1186. tm.assert_series_equal(larger_added[key], s + series[key])
  1187. assert "E" in larger_added
  1188. assert np.isnan(larger_added["E"]).all()
  1189. # no upcast needed
  1190. added = mixed_float_frame + series
  1191. assert np.all(added.dtypes == series.dtype)
  1192. # vs mix (upcast) as needed
  1193. added = mixed_float_frame + series.astype("float32")
  1194. _check_mixed_float(added, dtype={"C": None})
  1195. added = mixed_float_frame + series.astype("float16")
  1196. _check_mixed_float(added, dtype={"C": None})
  1197. # these used to raise with numexpr as we are adding an int64 to an
  1198. # uint64....weird vs int
  1199. added = mixed_int_frame + (100 * series).astype("int64")
  1200. _check_mixed_int(
  1201. added, dtype={"A": "int64", "B": "float64", "C": "int64", "D": "int64"}
  1202. )
  1203. added = mixed_int_frame + (100 * series).astype("int32")
  1204. _check_mixed_int(
  1205. added, dtype={"A": "int32", "B": "float64", "C": "int32", "D": "int64"}
  1206. )
  1207. def test_combine_timeseries(self, datetime_frame):
  1208. # TimeSeries
  1209. ts = datetime_frame["A"]
  1210. # 10890
  1211. # we no longer allow auto timeseries broadcasting
  1212. # and require explicit broadcasting
  1213. added = datetime_frame.add(ts, axis="index")
  1214. for key, col in datetime_frame.items():
  1215. result = col + ts
  1216. tm.assert_series_equal(added[key], result, check_names=False)
  1217. assert added[key].name == key
  1218. if col.name == ts.name:
  1219. assert result.name == "A"
  1220. else:
  1221. assert result.name is None
  1222. smaller_frame = datetime_frame[:-5]
  1223. smaller_added = smaller_frame.add(ts, axis="index")
  1224. tm.assert_index_equal(smaller_added.index, datetime_frame.index)
  1225. smaller_ts = ts[:-5]
  1226. smaller_added2 = datetime_frame.add(smaller_ts, axis="index")
  1227. tm.assert_frame_equal(smaller_added, smaller_added2)
  1228. # length 0, result is all-nan
  1229. result = datetime_frame.add(ts[:0], axis="index")
  1230. expected = DataFrame(
  1231. np.nan, index=datetime_frame.index, columns=datetime_frame.columns
  1232. )
  1233. tm.assert_frame_equal(result, expected)
  1234. # Frame is all-nan
  1235. result = datetime_frame[:0].add(ts, axis="index")
  1236. expected = DataFrame(
  1237. np.nan, index=datetime_frame.index, columns=datetime_frame.columns
  1238. )
  1239. tm.assert_frame_equal(result, expected)
  1240. # empty but with non-empty index
  1241. frame = datetime_frame[:1].reindex(columns=[])
  1242. result = frame.mul(ts, axis="index")
  1243. assert len(result) == len(ts)
  1244. def test_combineFunc(self, float_frame, mixed_float_frame):
  1245. result = float_frame * 2
  1246. tm.assert_numpy_array_equal(result.values, float_frame.values * 2)
  1247. # vs mix
  1248. result = mixed_float_frame * 2
  1249. for c, s in result.items():
  1250. tm.assert_numpy_array_equal(s.values, mixed_float_frame[c].values * 2)
  1251. _check_mixed_float(result, dtype={"C": None})
  1252. result = DataFrame() * 2
  1253. assert result.index.equals(DataFrame().index)
  1254. assert len(result.columns) == 0
  1255. @pytest.mark.parametrize(
  1256. "func",
  1257. [operator.eq, operator.ne, operator.lt, operator.gt, operator.ge, operator.le],
  1258. )
  1259. def test_comparisons(self, simple_frame, float_frame, func):
  1260. df1 = DataFrame(
  1261. np.random.default_rng(2).standard_normal((30, 4)),
  1262. columns=Index(list("ABCD"), dtype=object),
  1263. index=pd.date_range("2000-01-01", periods=30, freq="B"),
  1264. )
  1265. df2 = df1.copy()
  1266. row = simple_frame.xs("a")
  1267. ndim_5 = np.ones(df1.shape + (1, 1, 1))
  1268. result = func(df1, df2)
  1269. tm.assert_numpy_array_equal(result.values, func(df1.values, df2.values))
  1270. msg = (
  1271. "Unable to coerce to Series/DataFrame, "
  1272. "dimension must be <= 2: (30, 4, 1, 1, 1)"
  1273. )
  1274. with pytest.raises(ValueError, match=re.escape(msg)):
  1275. func(df1, ndim_5)
  1276. result2 = func(simple_frame, row)
  1277. tm.assert_numpy_array_equal(
  1278. result2.values, func(simple_frame.values, row.values)
  1279. )
  1280. result3 = func(float_frame, 0)
  1281. tm.assert_numpy_array_equal(result3.values, func(float_frame.values, 0))
  1282. msg = (
  1283. r"Can only compare identically-labeled \(both index and columns\) "
  1284. "DataFrame objects"
  1285. )
  1286. with pytest.raises(ValueError, match=msg):
  1287. func(simple_frame, simple_frame[:2])
  1288. def test_strings_to_numbers_comparisons_raises(self, compare_operators_no_eq_ne):
  1289. # GH 11565
  1290. df = DataFrame(
  1291. {x: {"x": "foo", "y": "bar", "z": "baz"} for x in ["a", "b", "c"]}
  1292. )
  1293. f = getattr(operator, compare_operators_no_eq_ne)
  1294. msg = "|".join(
  1295. [
  1296. "'[<>]=?' not supported between instances of 'str' and 'int'",
  1297. "Invalid comparison between dtype=str and int",
  1298. ]
  1299. )
  1300. with pytest.raises(TypeError, match=msg):
  1301. f(df, 0)
  1302. def test_comparison_protected_from_errstate(self):
  1303. missing_df = DataFrame(
  1304. np.ones((10, 4), dtype=np.float64),
  1305. columns=Index(list("ABCD"), dtype=object),
  1306. )
  1307. missing_df.loc[missing_df.index[0], "A"] = np.nan
  1308. with np.errstate(invalid="ignore"):
  1309. expected = missing_df.values < 0
  1310. with np.errstate(invalid="raise"):
  1311. result = (missing_df < 0).values
  1312. tm.assert_numpy_array_equal(result, expected)
  1313. def test_boolean_comparison(self):
  1314. # GH 4576
  1315. # boolean comparisons with a tuple/list give unexpected results
  1316. df = DataFrame(np.arange(6).reshape((3, 2)))
  1317. b = np.array([2, 2])
  1318. b_r = np.atleast_2d([2, 2])
  1319. b_c = b_r.T
  1320. lst = [2, 2, 2]
  1321. tup = tuple(lst)
  1322. # gt
  1323. expected = DataFrame([[False, False], [False, True], [True, True]])
  1324. result = df > b
  1325. tm.assert_frame_equal(result, expected)
  1326. result = df.values > b
  1327. tm.assert_numpy_array_equal(result, expected.values)
  1328. msg1d = "Unable to coerce to Series, length must be 2: given 3"
  1329. msg2d = "Unable to coerce to DataFrame, shape must be"
  1330. msg2db = "operands could not be broadcast together with shapes"
  1331. with pytest.raises(ValueError, match=msg1d):
  1332. # wrong shape
  1333. df > lst
  1334. with pytest.raises(ValueError, match=msg1d):
  1335. # wrong shape
  1336. df > tup
  1337. # broadcasts like ndarray (GH#23000)
  1338. result = df > b_r
  1339. tm.assert_frame_equal(result, expected)
  1340. result = df.values > b_r
  1341. tm.assert_numpy_array_equal(result, expected.values)
  1342. with pytest.raises(ValueError, match=msg2d):
  1343. df > b_c
  1344. with pytest.raises(ValueError, match=msg2db):
  1345. df.values > b_c
  1346. # ==
  1347. expected = DataFrame([[False, False], [True, False], [False, False]])
  1348. result = df == b
  1349. tm.assert_frame_equal(result, expected)
  1350. with pytest.raises(ValueError, match=msg1d):
  1351. df == lst
  1352. with pytest.raises(ValueError, match=msg1d):
  1353. df == tup
  1354. # broadcasts like ndarray (GH#23000)
  1355. result = df == b_r
  1356. tm.assert_frame_equal(result, expected)
  1357. result = df.values == b_r
  1358. tm.assert_numpy_array_equal(result, expected.values)
  1359. with pytest.raises(ValueError, match=msg2d):
  1360. df == b_c
  1361. assert df.values.shape != b_c.shape
  1362. # with alignment
  1363. df = DataFrame(
  1364. np.arange(6).reshape((3, 2)), columns=list("AB"), index=list("abc")
  1365. )
  1366. expected.index = df.index
  1367. expected.columns = df.columns
  1368. with pytest.raises(ValueError, match=msg1d):
  1369. df == lst
  1370. with pytest.raises(ValueError, match=msg1d):
  1371. df == tup
  1372. def test_inplace_ops_alignment(self):
  1373. # inplace ops / ops alignment
  1374. # GH 8511
  1375. columns = list("abcdefg")
  1376. X_orig = DataFrame(
  1377. np.arange(10 * len(columns)).reshape(-1, len(columns)),
  1378. columns=columns,
  1379. index=range(10),
  1380. )
  1381. Z = 100 * X_orig.iloc[:, 1:-1].copy()
  1382. block1 = list("bedcf")
  1383. subs = list("bcdef")
  1384. # add
  1385. X = X_orig.copy()
  1386. result1 = (X[block1] + Z).reindex(columns=subs)
  1387. X[block1] += Z
  1388. result2 = X.reindex(columns=subs)
  1389. X = X_orig.copy()
  1390. result3 = (X[block1] + Z[block1]).reindex(columns=subs)
  1391. X[block1] += Z[block1]
  1392. result4 = X.reindex(columns=subs)
  1393. tm.assert_frame_equal(result1, result2)
  1394. tm.assert_frame_equal(result1, result3)
  1395. tm.assert_frame_equal(result1, result4)
  1396. # sub
  1397. X = X_orig.copy()
  1398. result1 = (X[block1] - Z).reindex(columns=subs)
  1399. X[block1] -= Z
  1400. result2 = X.reindex(columns=subs)
  1401. X = X_orig.copy()
  1402. result3 = (X[block1] - Z[block1]).reindex(columns=subs)
  1403. X[block1] -= Z[block1]
  1404. result4 = X.reindex(columns=subs)
  1405. tm.assert_frame_equal(result1, result2)
  1406. tm.assert_frame_equal(result1, result3)
  1407. tm.assert_frame_equal(result1, result4)
  1408. def test_inplace_ops_identity(self):
  1409. # GH 5104
  1410. # make sure that we are actually changing the object
  1411. s_orig = Series([1, 2, 3])
  1412. df_orig = DataFrame(
  1413. np.random.default_rng(2).integers(0, 5, size=10).reshape(-1, 5)
  1414. )
  1415. # no dtype change
  1416. s = s_orig.copy()
  1417. s2 = s
  1418. s += 1
  1419. tm.assert_series_equal(s, s2)
  1420. tm.assert_series_equal(s_orig + 1, s)
  1421. assert s is s2
  1422. assert s._mgr is s2._mgr
  1423. df = df_orig.copy()
  1424. df2 = df
  1425. df += 1
  1426. tm.assert_frame_equal(df, df2)
  1427. tm.assert_frame_equal(df_orig + 1, df)
  1428. assert df is df2
  1429. assert df._mgr is df2._mgr
  1430. # dtype change
  1431. s = s_orig.copy()
  1432. s2 = s
  1433. s += 1.5
  1434. tm.assert_series_equal(s, s2)
  1435. tm.assert_series_equal(s_orig + 1.5, s)
  1436. df = df_orig.copy()
  1437. df2 = df
  1438. df += 1.5
  1439. tm.assert_frame_equal(df, df2)
  1440. tm.assert_frame_equal(df_orig + 1.5, df)
  1441. assert df is df2
  1442. assert df._mgr is df2._mgr
  1443. # mixed dtype
  1444. arr = np.random.default_rng(2).integers(0, 10, size=5)
  1445. df_orig = DataFrame({"A": arr.copy(), "B": "foo"})
  1446. df = df_orig.copy()
  1447. df2 = df
  1448. df["A"] += 1
  1449. expected = DataFrame({"A": arr.copy() + 1, "B": "foo"})
  1450. tm.assert_frame_equal(df, expected)
  1451. tm.assert_frame_equal(df2, expected)
  1452. assert df._mgr is df2._mgr
  1453. df = df_orig.copy()
  1454. df2 = df
  1455. df["A"] += 1.5
  1456. expected = DataFrame({"A": arr.copy() + 1.5, "B": "foo"})
  1457. tm.assert_frame_equal(df, expected)
  1458. tm.assert_frame_equal(df2, expected)
  1459. assert df._mgr is df2._mgr
  1460. @pytest.mark.parametrize(
  1461. "op",
  1462. [
  1463. "add",
  1464. "and",
  1465. pytest.param(
  1466. "div",
  1467. marks=pytest.mark.xfail(
  1468. raises=AttributeError, reason="__idiv__ not implemented"
  1469. ),
  1470. ),
  1471. "floordiv",
  1472. "mod",
  1473. "mul",
  1474. "or",
  1475. "pow",
  1476. "sub",
  1477. "truediv",
  1478. "xor",
  1479. ],
  1480. )
  1481. def test_inplace_ops_identity2(self, op):
  1482. df = DataFrame({"a": [1.0, 2.0, 3.0], "b": [1, 2, 3]})
  1483. operand = 2
  1484. if op in ("and", "or", "xor"):
  1485. # cannot use floats for boolean ops
  1486. df["a"] = [True, False, True]
  1487. df_copy = df.copy()
  1488. iop = f"__i{op}__"
  1489. op = f"__{op}__"
  1490. # no id change and value is correct
  1491. getattr(df, iop)(operand)
  1492. expected = getattr(df_copy, op)(operand)
  1493. tm.assert_frame_equal(df, expected)
  1494. expected = id(df)
  1495. assert id(df) == expected
  1496. @pytest.mark.parametrize(
  1497. "val",
  1498. [
  1499. [1, 2, 3],
  1500. (1, 2, 3),
  1501. np.array([1, 2, 3], dtype=np.int64),
  1502. range(1, 4),
  1503. ],
  1504. )
  1505. def test_alignment_non_pandas(self, val):
  1506. index = ["A", "B", "C"]
  1507. columns = ["X", "Y", "Z"]
  1508. df = DataFrame(
  1509. np.random.default_rng(2).standard_normal((3, 3)),
  1510. index=index,
  1511. columns=columns,
  1512. )
  1513. align = DataFrame._align_for_op
  1514. expected = DataFrame({"X": val, "Y": val, "Z": val}, index=df.index)
  1515. tm.assert_frame_equal(align(df, val, axis=0)[1], expected)
  1516. expected = DataFrame(
  1517. {"X": [1, 1, 1], "Y": [2, 2, 2], "Z": [3, 3, 3]}, index=df.index
  1518. )
  1519. tm.assert_frame_equal(align(df, val, axis=1)[1], expected)
  1520. @pytest.mark.parametrize("val", [[1, 2], (1, 2), np.array([1, 2]), range(1, 3)])
  1521. def test_alignment_non_pandas_length_mismatch(self, val):
  1522. index = ["A", "B", "C"]
  1523. columns = ["X", "Y", "Z"]
  1524. df = DataFrame(
  1525. np.random.default_rng(2).standard_normal((3, 3)),
  1526. index=index,
  1527. columns=columns,
  1528. )
  1529. align = DataFrame._align_for_op
  1530. # length mismatch
  1531. msg = "Unable to coerce to Series, length must be 3: given 2"
  1532. with pytest.raises(ValueError, match=msg):
  1533. align(df, val, axis=0)
  1534. with pytest.raises(ValueError, match=msg):
  1535. align(df, val, axis=1)
  1536. def test_alignment_non_pandas_index_columns(self):
  1537. index = ["A", "B", "C"]
  1538. columns = ["X", "Y", "Z"]
  1539. df = DataFrame(
  1540. np.random.default_rng(2).standard_normal((3, 3)),
  1541. index=index,
  1542. columns=columns,
  1543. )
  1544. align = DataFrame._align_for_op
  1545. val = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
  1546. tm.assert_frame_equal(
  1547. align(df, val, axis=0)[1],
  1548. DataFrame(val, index=df.index, columns=df.columns),
  1549. )
  1550. tm.assert_frame_equal(
  1551. align(df, val, axis=1)[1],
  1552. DataFrame(val, index=df.index, columns=df.columns),
  1553. )
  1554. # shape mismatch
  1555. msg = "Unable to coerce to DataFrame, shape must be"
  1556. val = np.array([[1, 2, 3], [4, 5, 6]])
  1557. with pytest.raises(ValueError, match=msg):
  1558. align(df, val, axis=0)
  1559. with pytest.raises(ValueError, match=msg):
  1560. align(df, val, axis=1)
  1561. val = np.zeros((3, 3, 3))
  1562. msg = re.escape(
  1563. "Unable to coerce to Series/DataFrame, dimension must be <= 2: (3, 3, 3)"
  1564. )
  1565. with pytest.raises(ValueError, match=msg):
  1566. align(df, val, axis=0)
  1567. with pytest.raises(ValueError, match=msg):
  1568. align(df, val, axis=1)
  1569. def test_no_warning(self, all_arithmetic_operators):
  1570. df = DataFrame({"A": [0.0, 0.0], "B": [0.0, None]})
  1571. b = df["B"]
  1572. with tm.assert_produces_warning(None):
  1573. getattr(df, all_arithmetic_operators)(b)
  1574. def test_dunder_methods_binary(self, all_arithmetic_operators):
  1575. # GH#??? frame.__foo__ should only accept one argument
  1576. df = DataFrame({"A": [0.0, 0.0], "B": [0.0, None]})
  1577. b = df["B"]
  1578. with pytest.raises(TypeError, match="takes 2 positional arguments"):
  1579. getattr(df, all_arithmetic_operators)(b, 0)
  1580. def test_align_int_fill_bug(self):
  1581. # GH#910
  1582. X = np.arange(10 * 10, dtype="float64").reshape(10, 10)
  1583. Y = np.ones((10, 1), dtype=int)
  1584. df1 = DataFrame(X)
  1585. df1["0.X"] = Y.squeeze()
  1586. df2 = df1.astype(float)
  1587. result = df1 - df1.mean()
  1588. expected = df2 - df2.mean()
  1589. tm.assert_frame_equal(result, expected)
  1590. def test_pow_with_realignment():
  1591. # GH#32685 pow has special semantics for operating with null values
  1592. left = DataFrame({"A": [0, 1, 2]})
  1593. right = DataFrame(index=[0, 1, 2])
  1594. result = left**right
  1595. expected = DataFrame({"A": [np.nan, 1.0, np.nan]})
  1596. tm.assert_frame_equal(result, expected)
  1597. def test_dataframe_series_extension_dtypes():
  1598. # https://github.com/pandas-dev/pandas/issues/34311
  1599. df = DataFrame(
  1600. np.random.default_rng(2).integers(0, 100, (10, 3)), columns=["a", "b", "c"]
  1601. )
  1602. ser = Series([1, 2, 3], index=["a", "b", "c"])
  1603. expected = df.to_numpy("int64") + ser.to_numpy("int64").reshape(-1, 3)
  1604. expected = DataFrame(expected, columns=df.columns, dtype="Int64")
  1605. df_ea = df.astype("Int64")
  1606. result = df_ea + ser
  1607. tm.assert_frame_equal(result, expected)
  1608. result = df_ea + ser.astype("Int64")
  1609. tm.assert_frame_equal(result, expected)
  1610. def test_dataframe_blockwise_slicelike():
  1611. # GH#34367
  1612. arr = np.random.default_rng(2).integers(0, 1000, (100, 10))
  1613. df1 = DataFrame(arr)
  1614. # Explicit cast to float to avoid implicit cast when setting nan
  1615. df2 = df1.copy().astype({1: "float", 3: "float", 7: "float"})
  1616. df2.iloc[0, [1, 3, 7]] = np.nan
  1617. # Explicit cast to float to avoid implicit cast when setting nan
  1618. df3 = df1.copy().astype({5: "float"})
  1619. df3.iloc[0, [5]] = np.nan
  1620. # Explicit cast to float to avoid implicit cast when setting nan
  1621. df4 = df1.copy().astype({2: "float", 3: "float", 4: "float"})
  1622. df4.iloc[0, np.arange(2, 5)] = np.nan
  1623. # Explicit cast to float to avoid implicit cast when setting nan
  1624. df5 = df1.copy().astype({4: "float", 5: "float", 6: "float"})
  1625. df5.iloc[0, np.arange(4, 7)] = np.nan
  1626. for left, right in [(df1, df2), (df2, df3), (df4, df5)]:
  1627. res = left + right
  1628. expected = DataFrame({i: left[i] + right[i] for i in left.columns})
  1629. tm.assert_frame_equal(res, expected)
  1630. @pytest.mark.parametrize(
  1631. "df, col_dtype",
  1632. [
  1633. (DataFrame([[1.0, 2.0], [4.0, 5.0]], columns=list("ab")), "float64"),
  1634. (
  1635. DataFrame([[1.0, "b"], [4.0, "b"]], columns=list("ab")).astype(
  1636. {"b": object}
  1637. ),
  1638. "object",
  1639. ),
  1640. ],
  1641. )
  1642. def test_dataframe_operation_with_non_numeric_types(df, col_dtype):
  1643. # GH #22663
  1644. expected = DataFrame([[0.0, np.nan], [3.0, np.nan]], columns=list("ab"))
  1645. expected = expected.astype({"b": col_dtype})
  1646. result = df + Series([-1.0], index=list("a"))
  1647. tm.assert_frame_equal(result, expected)
  1648. def test_arith_reindex_with_duplicates():
  1649. # https://github.com/pandas-dev/pandas/issues/35194
  1650. df1 = DataFrame(data=[[0]], columns=["second"])
  1651. df2 = DataFrame(data=[[0, 0, 0]], columns=["first", "second", "second"])
  1652. result = df1 + df2
  1653. expected = DataFrame([[np.nan, 0, 0]], columns=["first", "second", "second"])
  1654. tm.assert_frame_equal(result, expected)
  1655. @pytest.mark.parametrize("to_add", [[Series([1, 1])], [Series([1, 1]), Series([1, 1])]])
  1656. def test_arith_list_of_arraylike_raise(to_add):
  1657. # GH 36702. Raise when trying to add list of array-like to DataFrame
  1658. df = DataFrame({"x": [1, 2], "y": [1, 2]})
  1659. msg = f"Unable to coerce list of {type(to_add[0])} to Series/DataFrame"
  1660. with pytest.raises(ValueError, match=msg):
  1661. df + to_add
  1662. with pytest.raises(ValueError, match=msg):
  1663. to_add + df
  1664. def test_inplace_arithmetic_series_update(using_copy_on_write, warn_copy_on_write):
  1665. # https://github.com/pandas-dev/pandas/issues/36373
  1666. df = DataFrame({"A": [1, 2, 3]})
  1667. df_orig = df.copy()
  1668. series = df["A"]
  1669. vals = series._values
  1670. with tm.assert_cow_warning(warn_copy_on_write):
  1671. series += 1
  1672. if using_copy_on_write:
  1673. assert series._values is not vals
  1674. tm.assert_frame_equal(df, df_orig)
  1675. else:
  1676. assert series._values is vals
  1677. expected = DataFrame({"A": [2, 3, 4]})
  1678. tm.assert_frame_equal(df, expected)
  1679. def test_arithmetic_multiindex_align():
  1680. """
  1681. Regression test for: https://github.com/pandas-dev/pandas/issues/33765
  1682. """
  1683. df1 = DataFrame(
  1684. [[1]],
  1685. index=["a"],
  1686. columns=MultiIndex.from_product([[0], [1]], names=["a", "b"]),
  1687. )
  1688. df2 = DataFrame([[1]], index=["a"], columns=Index([0], name="a"))
  1689. expected = DataFrame(
  1690. [[0]],
  1691. index=["a"],
  1692. columns=MultiIndex.from_product([[0], [1]], names=["a", "b"]),
  1693. )
  1694. result = df1 - df2
  1695. tm.assert_frame_equal(result, expected)
  1696. def test_bool_frame_mult_float():
  1697. # GH 18549
  1698. df = DataFrame(True, list("ab"), list("cd"))
  1699. result = df * 1.0
  1700. expected = DataFrame(np.ones((2, 2)), list("ab"), list("cd"))
  1701. tm.assert_frame_equal(result, expected)
  1702. def test_frame_sub_nullable_int(any_int_ea_dtype):
  1703. # GH 32822
  1704. series1 = Series([1, 2, None], dtype=any_int_ea_dtype)
  1705. series2 = Series([1, 2, 3], dtype=any_int_ea_dtype)
  1706. expected = DataFrame([0, 0, None], dtype=any_int_ea_dtype)
  1707. result = series1.to_frame() - series2.to_frame()
  1708. tm.assert_frame_equal(result, expected)
  1709. @pytest.mark.filterwarnings(
  1710. "ignore:Passing a BlockManager|Passing a SingleBlockManager:DeprecationWarning"
  1711. )
  1712. def test_frame_op_subclass_nonclass_constructor():
  1713. # GH#43201 subclass._constructor is a function, not the subclass itself
  1714. class SubclassedSeries(Series):
  1715. @property
  1716. def _constructor(self):
  1717. return SubclassedSeries
  1718. @property
  1719. def _constructor_expanddim(self):
  1720. return SubclassedDataFrame
  1721. class SubclassedDataFrame(DataFrame):
  1722. _metadata = ["my_extra_data"]
  1723. def __init__(self, my_extra_data, *args, **kwargs) -> None:
  1724. self.my_extra_data = my_extra_data
  1725. super().__init__(*args, **kwargs)
  1726. @property
  1727. def _constructor(self):
  1728. return functools.partial(type(self), self.my_extra_data)
  1729. @property
  1730. def _constructor_sliced(self):
  1731. return SubclassedSeries
  1732. sdf = SubclassedDataFrame("some_data", {"A": [1, 2, 3], "B": [4, 5, 6]})
  1733. result = sdf * 2
  1734. expected = SubclassedDataFrame("some_data", {"A": [2, 4, 6], "B": [8, 10, 12]})
  1735. tm.assert_frame_equal(result, expected)
  1736. result = sdf + sdf
  1737. tm.assert_frame_equal(result, expected)
  1738. def test_enum_column_equality():
  1739. Cols = Enum("Cols", "col1 col2")
  1740. q1 = DataFrame({Cols.col1: [1, 2, 3]})
  1741. q2 = DataFrame({Cols.col1: [1, 2, 3]})
  1742. result = q1[Cols.col1] == q2[Cols.col1]
  1743. expected = Series([True, True, True], name=Cols.col1)
  1744. tm.assert_series_equal(result, expected)
  1745. def test_mixed_col_index_dtype(using_infer_string):
  1746. # GH 47382
  1747. df1 = DataFrame(columns=list("abc"), data=1.0, index=[0])
  1748. df2 = DataFrame(columns=list("abc"), data=0.0, index=[0])
  1749. df1.columns = df2.columns.astype("string")
  1750. result = df1 + df2
  1751. expected = DataFrame(columns=list("abc"), data=1.0, index=[0])
  1752. if using_infer_string:
  1753. # df2.columns.dtype will be "str" instead of object,
  1754. # so the aligned result will be "string", not object
  1755. if HAS_PYARROW:
  1756. dtype = "string[pyarrow]"
  1757. else:
  1758. dtype = "string"
  1759. expected.columns = expected.columns.astype(dtype)
  1760. tm.assert_frame_equal(result, expected)