test_eval.py 70 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006
  1. from __future__ import annotations
  2. from functools import reduce
  3. from itertools import product
  4. import operator
  5. import numpy as np
  6. import pytest
  7. from pandas.compat import PY312
  8. from pandas.compat._optional import import_optional_dependency
  9. from pandas.errors import (
  10. NumExprClobberingError,
  11. PerformanceWarning,
  12. UndefinedVariableError,
  13. )
  14. import pandas.util._test_decorators as td
  15. from pandas.core.dtypes.common import (
  16. is_bool,
  17. is_float,
  18. is_list_like,
  19. is_scalar,
  20. )
  21. import pandas as pd
  22. from pandas import (
  23. DataFrame,
  24. Index,
  25. Series,
  26. date_range,
  27. period_range,
  28. timedelta_range,
  29. )
  30. import pandas._testing as tm
  31. from pandas.core.computation import (
  32. expr,
  33. pytables,
  34. )
  35. from pandas.core.computation.engines import ENGINES
  36. from pandas.core.computation.expr import (
  37. BaseExprVisitor,
  38. PandasExprVisitor,
  39. PythonExprVisitor,
  40. )
  41. from pandas.core.computation.expressions import (
  42. NUMEXPR_INSTALLED,
  43. USE_NUMEXPR,
  44. )
  45. from pandas.core.computation.ops import (
  46. ARITH_OPS_SYMS,
  47. SPECIAL_CASE_ARITH_OPS_SYMS,
  48. _binary_math_ops,
  49. _binary_ops_dict,
  50. _unary_math_ops,
  51. )
  52. from pandas.core.computation.scope import DEFAULT_GLOBALS
  53. from pandas.util.version import Version
  54. numexpr = import_optional_dependency("numexpr", errors="ignore")
  55. @pytest.fixture(
  56. params=(
  57. pytest.param(
  58. engine,
  59. marks=[
  60. pytest.mark.skipif(
  61. engine == "numexpr" and not USE_NUMEXPR,
  62. reason=f"numexpr enabled->{USE_NUMEXPR}, "
  63. f"installed->{NUMEXPR_INSTALLED}",
  64. ),
  65. td.skip_if_no("numexpr"),
  66. ],
  67. )
  68. for engine in ENGINES
  69. )
  70. )
  71. def engine(request):
  72. return request.param
  73. @pytest.fixture(params=expr.PARSERS)
  74. def parser(request):
  75. return request.param
  76. def _eval_single_bin(lhs, cmp1, rhs, engine):
  77. c = _binary_ops_dict[cmp1]
  78. if ENGINES[engine].has_neg_frac:
  79. try:
  80. return c(lhs, rhs)
  81. except ValueError as e:
  82. if str(e).startswith(
  83. "negative number cannot be raised to a fractional power"
  84. ):
  85. return np.nan
  86. raise
  87. return c(lhs, rhs)
  88. # TODO: using range(5) here is a kludge
  89. @pytest.fixture(
  90. params=list(range(5)),
  91. ids=["DataFrame", "Series", "SeriesNaN", "DataFrameNaN", "float"],
  92. )
  93. def lhs(request):
  94. nan_df1 = DataFrame(np.random.default_rng(2).standard_normal((10, 5)))
  95. nan_df1[nan_df1 > 0.5] = np.nan
  96. opts = (
  97. DataFrame(np.random.default_rng(2).standard_normal((10, 5))),
  98. Series(np.random.default_rng(2).standard_normal(5)),
  99. Series([1, 2, np.nan, np.nan, 5]),
  100. nan_df1,
  101. np.random.default_rng(2).standard_normal(),
  102. )
  103. return opts[request.param]
  104. rhs = lhs
  105. midhs = lhs
  106. @pytest.fixture
  107. def idx_func_dict():
  108. return {
  109. "i": lambda n: Index(np.arange(n), dtype=np.int64),
  110. "f": lambda n: Index(np.arange(n), dtype=np.float64),
  111. "s": lambda n: Index([f"{i}_{chr(i)}" for i in range(97, 97 + n)]),
  112. "dt": lambda n: date_range("2020-01-01", periods=n),
  113. "td": lambda n: timedelta_range("1 day", periods=n),
  114. "p": lambda n: period_range("2020-01-01", periods=n, freq="D"),
  115. }
  116. class TestEval:
  117. @pytest.mark.parametrize(
  118. "cmp1",
  119. ["!=", "==", "<=", ">=", "<", ">"],
  120. ids=["ne", "eq", "le", "ge", "lt", "gt"],
  121. )
  122. @pytest.mark.parametrize("cmp2", [">", "<"], ids=["gt", "lt"])
  123. @pytest.mark.parametrize("binop", expr.BOOL_OPS_SYMS)
  124. def test_complex_cmp_ops(self, cmp1, cmp2, binop, lhs, rhs, engine, parser):
  125. if parser == "python" and binop in ["and", "or"]:
  126. msg = "'BoolOp' nodes are not implemented"
  127. with pytest.raises(NotImplementedError, match=msg):
  128. ex = f"(lhs {cmp1} rhs) {binop} (lhs {cmp2} rhs)"
  129. pd.eval(ex, engine=engine, parser=parser)
  130. return
  131. lhs_new = _eval_single_bin(lhs, cmp1, rhs, engine)
  132. rhs_new = _eval_single_bin(lhs, cmp2, rhs, engine)
  133. expected = _eval_single_bin(lhs_new, binop, rhs_new, engine)
  134. ex = f"(lhs {cmp1} rhs) {binop} (lhs {cmp2} rhs)"
  135. result = pd.eval(ex, engine=engine, parser=parser)
  136. tm.assert_equal(result, expected)
  137. @pytest.mark.parametrize("cmp_op", expr.CMP_OPS_SYMS)
  138. def test_simple_cmp_ops(self, cmp_op, lhs, rhs, engine, parser):
  139. lhs = lhs < 0
  140. rhs = rhs < 0
  141. if parser == "python" and cmp_op in ["in", "not in"]:
  142. msg = "'(In|NotIn)' nodes are not implemented"
  143. with pytest.raises(NotImplementedError, match=msg):
  144. ex = f"lhs {cmp_op} rhs"
  145. pd.eval(ex, engine=engine, parser=parser)
  146. return
  147. ex = f"lhs {cmp_op} rhs"
  148. msg = "|".join(
  149. [
  150. r"only list-like( or dict-like)? objects are allowed to be "
  151. r"passed to (DataFrame\.)?isin\(\), you passed a "
  152. r"(`|')bool(`|')",
  153. "argument of type 'bool' is not iterable",
  154. ]
  155. )
  156. if cmp_op in ("in", "not in") and not is_list_like(rhs):
  157. with pytest.raises(TypeError, match=msg):
  158. pd.eval(
  159. ex,
  160. engine=engine,
  161. parser=parser,
  162. local_dict={"lhs": lhs, "rhs": rhs},
  163. )
  164. else:
  165. expected = _eval_single_bin(lhs, cmp_op, rhs, engine)
  166. result = pd.eval(ex, engine=engine, parser=parser)
  167. tm.assert_equal(result, expected)
  168. @pytest.mark.parametrize("op", expr.CMP_OPS_SYMS)
  169. def test_compound_invert_op(self, op, lhs, rhs, request, engine, parser):
  170. if parser == "python" and op in ["in", "not in"]:
  171. msg = "'(In|NotIn)' nodes are not implemented"
  172. with pytest.raises(NotImplementedError, match=msg):
  173. ex = f"~(lhs {op} rhs)"
  174. pd.eval(ex, engine=engine, parser=parser)
  175. return
  176. if (
  177. is_float(lhs)
  178. and not is_float(rhs)
  179. and op in ["in", "not in"]
  180. and engine == "python"
  181. and parser == "pandas"
  182. ):
  183. mark = pytest.mark.xfail(
  184. reason="Looks like expected is negative, unclear whether "
  185. "expected is incorrect or result is incorrect"
  186. )
  187. request.applymarker(mark)
  188. skip_these = ["in", "not in"]
  189. ex = f"~(lhs {op} rhs)"
  190. msg = "|".join(
  191. [
  192. r"only list-like( or dict-like)? objects are allowed to be "
  193. r"passed to (DataFrame\.)?isin\(\), you passed a "
  194. r"(`|')float(`|')",
  195. "argument of type 'float' is not iterable",
  196. ]
  197. )
  198. if is_scalar(rhs) and op in skip_these:
  199. with pytest.raises(TypeError, match=msg):
  200. pd.eval(
  201. ex,
  202. engine=engine,
  203. parser=parser,
  204. local_dict={"lhs": lhs, "rhs": rhs},
  205. )
  206. else:
  207. # compound
  208. if is_scalar(lhs) and is_scalar(rhs):
  209. lhs, rhs = (np.array([x]) for x in (lhs, rhs))
  210. expected = _eval_single_bin(lhs, op, rhs, engine)
  211. if is_scalar(expected):
  212. expected = not expected
  213. else:
  214. expected = ~expected
  215. result = pd.eval(ex, engine=engine, parser=parser)
  216. tm.assert_almost_equal(expected, result)
  217. @pytest.mark.parametrize("cmp1", ["<", ">"])
  218. @pytest.mark.parametrize("cmp2", ["<", ">"])
  219. def test_chained_cmp_op(self, cmp1, cmp2, lhs, midhs, rhs, engine, parser):
  220. mid = midhs
  221. if parser == "python":
  222. ex1 = f"lhs {cmp1} mid {cmp2} rhs"
  223. msg = "'BoolOp' nodes are not implemented"
  224. with pytest.raises(NotImplementedError, match=msg):
  225. pd.eval(ex1, engine=engine, parser=parser)
  226. return
  227. lhs_new = _eval_single_bin(lhs, cmp1, mid, engine)
  228. rhs_new = _eval_single_bin(mid, cmp2, rhs, engine)
  229. if lhs_new is not None and rhs_new is not None:
  230. ex1 = f"lhs {cmp1} mid {cmp2} rhs"
  231. ex2 = f"lhs {cmp1} mid and mid {cmp2} rhs"
  232. ex3 = f"(lhs {cmp1} mid) & (mid {cmp2} rhs)"
  233. expected = _eval_single_bin(lhs_new, "&", rhs_new, engine)
  234. for ex in (ex1, ex2, ex3):
  235. result = pd.eval(ex, engine=engine, parser=parser)
  236. tm.assert_almost_equal(result, expected)
  237. @pytest.mark.parametrize(
  238. "arith1", sorted(set(ARITH_OPS_SYMS).difference(SPECIAL_CASE_ARITH_OPS_SYMS))
  239. )
  240. def test_binary_arith_ops(self, arith1, lhs, rhs, engine, parser):
  241. ex = f"lhs {arith1} rhs"
  242. result = pd.eval(ex, engine=engine, parser=parser)
  243. expected = _eval_single_bin(lhs, arith1, rhs, engine)
  244. tm.assert_almost_equal(result, expected)
  245. ex = f"lhs {arith1} rhs {arith1} rhs"
  246. result = pd.eval(ex, engine=engine, parser=parser)
  247. nlhs = _eval_single_bin(lhs, arith1, rhs, engine)
  248. try:
  249. nlhs, ghs = nlhs.align(rhs)
  250. except (ValueError, TypeError, AttributeError):
  251. # ValueError: series frame or frame series align
  252. # TypeError, AttributeError: series or frame with scalar align
  253. return
  254. else:
  255. if engine == "numexpr":
  256. import numexpr as ne
  257. # direct numpy comparison
  258. expected = ne.evaluate(f"nlhs {arith1} ghs")
  259. # Update assert statement due to unreliable numerical
  260. # precision component (GH37328)
  261. # TODO: update testing code so that assert_almost_equal statement
  262. # can be replaced again by the assert_numpy_array_equal statement
  263. tm.assert_almost_equal(result.values, expected)
  264. else:
  265. expected = eval(f"nlhs {arith1} ghs")
  266. tm.assert_almost_equal(result, expected)
  267. # modulus, pow, and floor division require special casing
  268. def test_modulus(self, lhs, rhs, engine, parser):
  269. ex = r"lhs % rhs"
  270. result = pd.eval(ex, engine=engine, parser=parser)
  271. expected = lhs % rhs
  272. tm.assert_almost_equal(result, expected)
  273. if engine == "numexpr":
  274. import numexpr as ne
  275. expected = ne.evaluate(r"expected % rhs")
  276. if isinstance(result, (DataFrame, Series)):
  277. tm.assert_almost_equal(result.values, expected)
  278. else:
  279. tm.assert_almost_equal(result, expected.item())
  280. else:
  281. expected = _eval_single_bin(expected, "%", rhs, engine)
  282. tm.assert_almost_equal(result, expected)
  283. def test_floor_division(self, lhs, rhs, engine, parser):
  284. ex = "lhs // rhs"
  285. if engine == "python" or (
  286. engine == "numexpr" and Version(numexpr.__version__) >= Version("2.13.0")
  287. ):
  288. res = pd.eval(ex, engine=engine, parser=parser)
  289. expected = lhs // rhs
  290. tm.assert_equal(res, expected)
  291. else:
  292. msg = (
  293. r"unsupported operand type\(s\) for //: 'VariableNode' and "
  294. "'VariableNode'"
  295. )
  296. with pytest.raises(TypeError, match=msg):
  297. pd.eval(
  298. ex,
  299. local_dict={"lhs": lhs, "rhs": rhs},
  300. engine=engine,
  301. parser=parser,
  302. )
  303. @td.skip_if_windows
  304. def test_pow(self, lhs, rhs, engine, parser):
  305. # odd failure on win32 platform, so skip
  306. ex = "lhs ** rhs"
  307. expected = _eval_single_bin(lhs, "**", rhs, engine)
  308. result = pd.eval(ex, engine=engine, parser=parser)
  309. if (
  310. is_scalar(lhs)
  311. and is_scalar(rhs)
  312. and isinstance(expected, (complex, np.complexfloating))
  313. and np.isnan(result)
  314. ):
  315. msg = "(DataFrame.columns|numpy array) are different"
  316. with pytest.raises(AssertionError, match=msg):
  317. tm.assert_numpy_array_equal(result, expected)
  318. else:
  319. tm.assert_almost_equal(result, expected)
  320. ex = "(lhs ** rhs) ** rhs"
  321. result = pd.eval(ex, engine=engine, parser=parser)
  322. middle = _eval_single_bin(lhs, "**", rhs, engine)
  323. expected = _eval_single_bin(middle, "**", rhs, engine)
  324. tm.assert_almost_equal(result, expected)
  325. def test_check_single_invert_op(self, lhs, engine, parser):
  326. # simple
  327. try:
  328. elb = lhs.astype(bool)
  329. except AttributeError:
  330. elb = np.array([bool(lhs)])
  331. expected = ~elb
  332. result = pd.eval("~elb", engine=engine, parser=parser)
  333. tm.assert_almost_equal(expected, result)
  334. def test_frame_invert(self, engine, parser):
  335. expr = "~lhs"
  336. # ~ ##
  337. # frame
  338. # float always raises
  339. lhs = DataFrame(np.random.default_rng(2).standard_normal((5, 2)))
  340. if engine == "numexpr":
  341. msg = "couldn't find matching opcode for 'invert_dd'"
  342. with pytest.raises(NotImplementedError, match=msg):
  343. pd.eval(expr, engine=engine, parser=parser)
  344. else:
  345. msg = "ufunc 'invert' not supported for the input types"
  346. with pytest.raises(TypeError, match=msg):
  347. pd.eval(expr, engine=engine, parser=parser)
  348. # int raises on numexpr
  349. lhs = DataFrame(np.random.default_rng(2).integers(5, size=(5, 2)))
  350. if engine == "numexpr" and Version(numexpr.__version__) < Version("2.13.0"):
  351. msg = "couldn't find matching opcode for 'invert"
  352. with pytest.raises(NotImplementedError, match=msg):
  353. pd.eval(expr, engine=engine, parser=parser)
  354. else:
  355. expect = ~lhs
  356. result = pd.eval(expr, engine=engine, parser=parser)
  357. tm.assert_frame_equal(expect, result)
  358. # bool always works
  359. lhs = DataFrame(np.random.default_rng(2).standard_normal((5, 2)) > 0.5)
  360. expect = ~lhs
  361. result = pd.eval(expr, engine=engine, parser=parser)
  362. tm.assert_frame_equal(expect, result)
  363. # object raises
  364. lhs = DataFrame(
  365. {"b": ["a", 1, 2.0], "c": np.random.default_rng(2).standard_normal(3) > 0.5}
  366. )
  367. if engine == "numexpr":
  368. with pytest.raises(ValueError, match="unknown type object"):
  369. pd.eval(expr, engine=engine, parser=parser)
  370. else:
  371. msg = "bad operand type for unary ~: 'str'"
  372. with pytest.raises(TypeError, match=msg):
  373. pd.eval(expr, engine=engine, parser=parser)
  374. def test_series_invert(self, engine, parser):
  375. # ~ ####
  376. expr = "~lhs"
  377. # series
  378. # float raises
  379. lhs = Series(np.random.default_rng(2).standard_normal(5))
  380. if engine == "numexpr":
  381. msg = "couldn't find matching opcode for 'invert_dd'"
  382. with pytest.raises(NotImplementedError, match=msg):
  383. result = pd.eval(expr, engine=engine, parser=parser)
  384. else:
  385. msg = "ufunc 'invert' not supported for the input types"
  386. with pytest.raises(TypeError, match=msg):
  387. pd.eval(expr, engine=engine, parser=parser)
  388. # int raises on numexpr
  389. lhs = Series(np.random.default_rng(2).integers(5, size=5))
  390. if engine == "numexpr" and Version(numexpr.__version__) < Version("2.13.0"):
  391. msg = "couldn't find matching opcode for 'invert"
  392. with pytest.raises(NotImplementedError, match=msg):
  393. pd.eval(expr, engine=engine, parser=parser)
  394. else:
  395. expect = ~lhs
  396. result = pd.eval(expr, engine=engine, parser=parser)
  397. tm.assert_series_equal(expect, result)
  398. # bool
  399. lhs = Series(np.random.default_rng(2).standard_normal(5) > 0.5)
  400. expect = ~lhs
  401. result = pd.eval(expr, engine=engine, parser=parser)
  402. tm.assert_series_equal(expect, result)
  403. # float
  404. # int
  405. # bool
  406. # object
  407. lhs = Series(["a", 1, 2.0])
  408. if engine == "numexpr":
  409. with pytest.raises(ValueError, match="unknown type object"):
  410. pd.eval(expr, engine=engine, parser=parser)
  411. else:
  412. msg = "bad operand type for unary ~: 'str'"
  413. with pytest.raises(TypeError, match=msg):
  414. pd.eval(expr, engine=engine, parser=parser)
  415. def test_frame_negate(self, engine, parser):
  416. expr = "-lhs"
  417. # float
  418. lhs = DataFrame(np.random.default_rng(2).standard_normal((5, 2)))
  419. expect = -lhs
  420. result = pd.eval(expr, engine=engine, parser=parser)
  421. tm.assert_frame_equal(expect, result)
  422. # int
  423. lhs = DataFrame(np.random.default_rng(2).integers(5, size=(5, 2)))
  424. expect = -lhs
  425. result = pd.eval(expr, engine=engine, parser=parser)
  426. tm.assert_frame_equal(expect, result)
  427. # bool doesn't work with numexpr but works elsewhere
  428. lhs = DataFrame(np.random.default_rng(2).standard_normal((5, 2)) > 0.5)
  429. if engine == "numexpr":
  430. msg = "couldn't find matching opcode for 'neg_bb'"
  431. with pytest.raises(NotImplementedError, match=msg):
  432. pd.eval(expr, engine=engine, parser=parser)
  433. else:
  434. expect = -lhs
  435. result = pd.eval(expr, engine=engine, parser=parser)
  436. tm.assert_frame_equal(expect, result)
  437. def test_series_negate(self, engine, parser):
  438. expr = "-lhs"
  439. # float
  440. lhs = Series(np.random.default_rng(2).standard_normal(5))
  441. expect = -lhs
  442. result = pd.eval(expr, engine=engine, parser=parser)
  443. tm.assert_series_equal(expect, result)
  444. # int
  445. lhs = Series(np.random.default_rng(2).integers(5, size=5))
  446. expect = -lhs
  447. result = pd.eval(expr, engine=engine, parser=parser)
  448. tm.assert_series_equal(expect, result)
  449. # bool doesn't work with numexpr but works elsewhere
  450. lhs = Series(np.random.default_rng(2).standard_normal(5) > 0.5)
  451. if engine == "numexpr":
  452. msg = "couldn't find matching opcode for 'neg_bb'"
  453. with pytest.raises(NotImplementedError, match=msg):
  454. pd.eval(expr, engine=engine, parser=parser)
  455. else:
  456. expect = -lhs
  457. result = pd.eval(expr, engine=engine, parser=parser)
  458. tm.assert_series_equal(expect, result)
  459. @pytest.mark.parametrize(
  460. "lhs",
  461. [
  462. # Float
  463. DataFrame(np.random.default_rng(2).standard_normal((5, 2))),
  464. # Int
  465. DataFrame(np.random.default_rng(2).integers(5, size=(5, 2))),
  466. # bool doesn't work with numexpr but works elsewhere
  467. DataFrame(np.random.default_rng(2).standard_normal((5, 2)) > 0.5),
  468. ],
  469. )
  470. def test_frame_pos(self, lhs, engine, parser):
  471. expr = "+lhs"
  472. expect = lhs
  473. result = pd.eval(expr, engine=engine, parser=parser)
  474. tm.assert_frame_equal(expect, result)
  475. @pytest.mark.parametrize(
  476. "lhs",
  477. [
  478. # Float
  479. Series(np.random.default_rng(2).standard_normal(5)),
  480. # Int
  481. Series(np.random.default_rng(2).integers(5, size=5)),
  482. # bool doesn't work with numexpr but works elsewhere
  483. Series(np.random.default_rng(2).standard_normal(5) > 0.5),
  484. ],
  485. )
  486. def test_series_pos(self, lhs, engine, parser):
  487. expr = "+lhs"
  488. expect = lhs
  489. result = pd.eval(expr, engine=engine, parser=parser)
  490. tm.assert_series_equal(expect, result)
  491. def test_scalar_unary(self, engine, parser):
  492. msg = "bad operand type for unary ~: 'float'"
  493. warn = None
  494. if PY312 and not (engine == "numexpr" and parser == "pandas"):
  495. warn = DeprecationWarning
  496. with pytest.raises(TypeError, match=msg):
  497. pd.eval("~1.0", engine=engine, parser=parser)
  498. assert pd.eval("-1.0", parser=parser, engine=engine) == -1.0
  499. assert pd.eval("+1.0", parser=parser, engine=engine) == +1.0
  500. assert pd.eval("~1", parser=parser, engine=engine) == ~1
  501. assert pd.eval("-1", parser=parser, engine=engine) == -1
  502. assert pd.eval("+1", parser=parser, engine=engine) == +1
  503. with tm.assert_produces_warning(
  504. warn, match="Bitwise inversion", check_stacklevel=False
  505. ):
  506. assert pd.eval("~True", parser=parser, engine=engine) == ~True
  507. with tm.assert_produces_warning(
  508. warn, match="Bitwise inversion", check_stacklevel=False
  509. ):
  510. assert pd.eval("~False", parser=parser, engine=engine) == ~False
  511. assert pd.eval("-True", parser=parser, engine=engine) == -True
  512. assert pd.eval("-False", parser=parser, engine=engine) == -False
  513. assert pd.eval("+True", parser=parser, engine=engine) == +True
  514. assert pd.eval("+False", parser=parser, engine=engine) == +False
  515. def test_unary_in_array(self):
  516. # GH 11235
  517. # TODO: 2022-01-29: result return list with numexpr 2.7.3 in CI
  518. # but cannot reproduce locally
  519. result = np.array(
  520. pd.eval("[-True, True, +True, -False, False, +False, -37, 37, ~37, +37]"),
  521. dtype=np.object_,
  522. )
  523. expected = np.array(
  524. [
  525. -True,
  526. True,
  527. +True,
  528. -False,
  529. False,
  530. +False,
  531. -37,
  532. 37,
  533. ~37,
  534. +37,
  535. ],
  536. dtype=np.object_,
  537. )
  538. tm.assert_numpy_array_equal(result, expected)
  539. @pytest.mark.parametrize("expr", ["x < -0.1", "-5 > x"])
  540. def test_float_comparison_bin_op(self, float_numpy_dtype, expr):
  541. # GH 16363
  542. df = DataFrame({"x": np.array([0], dtype=float_numpy_dtype)})
  543. res = df.eval(expr)
  544. assert res.values == np.array([False])
  545. def test_unary_in_function(self):
  546. # GH 46471
  547. df = DataFrame({"x": [0, 1, np.nan]})
  548. result = df.eval("x.fillna(-1)")
  549. expected = df.x.fillna(-1)
  550. # column name becomes None if using numexpr
  551. # only check names when the engine is not numexpr
  552. tm.assert_series_equal(result, expected, check_names=not USE_NUMEXPR)
  553. result = df.eval("x.shift(1, fill_value=-1)")
  554. expected = df.x.shift(1, fill_value=-1)
  555. tm.assert_series_equal(result, expected, check_names=not USE_NUMEXPR)
  556. @pytest.mark.parametrize(
  557. "ex",
  558. (
  559. "1 or 2",
  560. "1 and 2",
  561. "a and b",
  562. "a or b",
  563. "1 or 2 and (3 + 2) > 3",
  564. "2 * x > 2 or 1 and 2",
  565. "2 * df > 3 and 1 or a",
  566. ),
  567. )
  568. def test_disallow_scalar_bool_ops(self, ex, engine, parser):
  569. x, a, b = np.random.default_rng(2).standard_normal(3), 1, 2 # noqa: F841
  570. df = DataFrame(np.random.default_rng(2).standard_normal((3, 2))) # noqa: F841
  571. msg = "cannot evaluate scalar only bool ops|'BoolOp' nodes are not"
  572. with pytest.raises(NotImplementedError, match=msg):
  573. pd.eval(ex, engine=engine, parser=parser)
  574. def test_identical(self, engine, parser):
  575. # see gh-10546
  576. x = 1
  577. result = pd.eval("x", engine=engine, parser=parser)
  578. assert result == 1
  579. assert is_scalar(result)
  580. x = 1.5
  581. result = pd.eval("x", engine=engine, parser=parser)
  582. assert result == 1.5
  583. assert is_scalar(result)
  584. x = False
  585. result = pd.eval("x", engine=engine, parser=parser)
  586. assert not result
  587. assert is_bool(result)
  588. assert is_scalar(result)
  589. x = np.array([1])
  590. result = pd.eval("x", engine=engine, parser=parser)
  591. tm.assert_numpy_array_equal(result, np.array([1]))
  592. assert result.shape == (1,)
  593. x = np.array([1.5])
  594. result = pd.eval("x", engine=engine, parser=parser)
  595. tm.assert_numpy_array_equal(result, np.array([1.5]))
  596. assert result.shape == (1,)
  597. x = np.array([False]) # noqa: F841
  598. result = pd.eval("x", engine=engine, parser=parser)
  599. tm.assert_numpy_array_equal(result, np.array([False]))
  600. assert result.shape == (1,)
  601. def test_line_continuation(self, engine, parser):
  602. # GH 11149
  603. exp = """1 + 2 * \
  604. 5 - 1 + 2 """
  605. result = pd.eval(exp, engine=engine, parser=parser)
  606. assert result == 12
  607. def test_float_truncation(self, engine, parser):
  608. # GH 14241
  609. exp = "1000000000.006"
  610. result = pd.eval(exp, engine=engine, parser=parser)
  611. expected = np.float64(exp)
  612. assert result == expected
  613. df = DataFrame({"A": [1000000000.0009, 1000000000.0011, 1000000000.0015]})
  614. cutoff = 1000000000.0006
  615. result = df.query(f"A < {cutoff:.4f}")
  616. assert result.empty
  617. cutoff = 1000000000.0010
  618. result = df.query(f"A > {cutoff:.4f}")
  619. expected = df.loc[[1, 2], :]
  620. tm.assert_frame_equal(expected, result)
  621. exact = 1000000000.0011
  622. result = df.query(f"A == {exact:.4f}")
  623. expected = df.loc[[1], :]
  624. tm.assert_frame_equal(expected, result)
  625. def test_disallow_python_keywords(self):
  626. # GH 18221
  627. df = DataFrame([[0, 0, 0]], columns=["foo", "bar", "class"])
  628. msg = "Python keyword not valid identifier in numexpr query"
  629. with pytest.raises(SyntaxError, match=msg):
  630. df.query("class == 0")
  631. df = DataFrame()
  632. df.index.name = "lambda"
  633. with pytest.raises(SyntaxError, match=msg):
  634. df.query("lambda == 0")
  635. def test_true_false_logic(self):
  636. # GH 25823
  637. # This behavior is deprecated in Python 3.12
  638. with tm.maybe_produces_warning(
  639. DeprecationWarning, PY312, check_stacklevel=False
  640. ):
  641. assert pd.eval("not True") == -2
  642. assert pd.eval("not False") == -1
  643. assert pd.eval("True and not True") == 0
  644. def test_and_logic_string_match(self):
  645. # GH 25823
  646. event = Series({"a": "hello"})
  647. assert pd.eval(f"{event.str.match('hello').a}")
  648. assert pd.eval(f"{event.str.match('hello').a and event.str.match('hello').a}")
  649. # -------------------------------------
  650. # gh-12388: Typecasting rules consistency with python
  651. class TestTypeCasting:
  652. @pytest.mark.parametrize("op", ["+", "-", "*", "**", "/"])
  653. # maybe someday... numexpr has too many upcasting rules now
  654. # chain(*(np.core.sctypes[x] for x in ['uint', 'int', 'float']))
  655. @pytest.mark.parametrize("left_right", [("df", "3"), ("3", "df")])
  656. def test_binop_typecasting(
  657. self, engine, parser, op, complex_or_float_dtype, left_right, request
  658. ):
  659. # GH#21374
  660. dtype = complex_or_float_dtype
  661. df = DataFrame(np.random.default_rng(2).standard_normal((5, 3)), dtype=dtype)
  662. left, right = left_right
  663. s = f"{left} {op} {right}"
  664. res = pd.eval(s, engine=engine, parser=parser)
  665. if dtype == "complex64" and engine == "numexpr":
  666. mark = pytest.mark.xfail(
  667. reason="numexpr issue with complex that are upcast "
  668. "to complex 128 "
  669. "https://github.com/pydata/numexpr/issues/492"
  670. )
  671. request.applymarker(mark)
  672. assert df.values.dtype == dtype
  673. assert res.values.dtype == dtype
  674. tm.assert_frame_equal(res, eval(s), check_exact=False)
  675. # -------------------------------------
  676. # Basic and complex alignment
  677. def should_warn(*args):
  678. not_mono = not any(map(operator.attrgetter("is_monotonic_increasing"), args))
  679. only_one_dt = reduce(
  680. operator.xor, (issubclass(x.dtype.type, np.datetime64) for x in args)
  681. )
  682. return not_mono and only_one_dt
  683. class TestAlignment:
  684. index_types = ["i", "s", "dt"]
  685. lhs_index_types = index_types + ["s"] # 'p'
  686. def test_align_nested_unary_op(self, engine, parser):
  687. s = "df * ~2"
  688. df = DataFrame(np.random.default_rng(2).standard_normal((5, 3)))
  689. res = pd.eval(s, engine=engine, parser=parser)
  690. tm.assert_frame_equal(res, df * ~2)
  691. @pytest.mark.filterwarnings("always::RuntimeWarning")
  692. @pytest.mark.parametrize("lr_idx_type", lhs_index_types)
  693. @pytest.mark.parametrize("rr_idx_type", index_types)
  694. @pytest.mark.parametrize("c_idx_type", index_types)
  695. def test_basic_frame_alignment(
  696. self, engine, parser, lr_idx_type, rr_idx_type, c_idx_type, idx_func_dict
  697. ):
  698. df = DataFrame(
  699. np.random.default_rng(2).standard_normal((10, 10)),
  700. index=idx_func_dict[lr_idx_type](10),
  701. columns=idx_func_dict[c_idx_type](10),
  702. )
  703. df2 = DataFrame(
  704. np.random.default_rng(2).standard_normal((20, 10)),
  705. index=idx_func_dict[rr_idx_type](20),
  706. columns=idx_func_dict[c_idx_type](10),
  707. )
  708. # only warns if not monotonic and not sortable
  709. if should_warn(df.index, df2.index):
  710. with tm.assert_produces_warning(RuntimeWarning):
  711. res = pd.eval("df + df2", engine=engine, parser=parser)
  712. else:
  713. res = pd.eval("df + df2", engine=engine, parser=parser)
  714. tm.assert_frame_equal(res, df + df2)
  715. @pytest.mark.parametrize("r_idx_type", lhs_index_types)
  716. @pytest.mark.parametrize("c_idx_type", lhs_index_types)
  717. def test_frame_comparison(
  718. self, engine, parser, r_idx_type, c_idx_type, idx_func_dict
  719. ):
  720. df = DataFrame(
  721. np.random.default_rng(2).standard_normal((10, 10)),
  722. index=idx_func_dict[r_idx_type](10),
  723. columns=idx_func_dict[c_idx_type](10),
  724. )
  725. res = pd.eval("df < 2", engine=engine, parser=parser)
  726. tm.assert_frame_equal(res, df < 2)
  727. df3 = DataFrame(
  728. np.random.default_rng(2).standard_normal(df.shape),
  729. index=df.index,
  730. columns=df.columns,
  731. )
  732. res = pd.eval("df < df3", engine=engine, parser=parser)
  733. tm.assert_frame_equal(res, df < df3)
  734. @pytest.mark.filterwarnings("ignore::RuntimeWarning")
  735. @pytest.mark.parametrize("r1", lhs_index_types)
  736. @pytest.mark.parametrize("c1", index_types)
  737. @pytest.mark.parametrize("r2", index_types)
  738. @pytest.mark.parametrize("c2", index_types)
  739. def test_medium_complex_frame_alignment(
  740. self, engine, parser, r1, c1, r2, c2, idx_func_dict
  741. ):
  742. df = DataFrame(
  743. np.random.default_rng(2).standard_normal((3, 2)),
  744. index=idx_func_dict[r1](3),
  745. columns=idx_func_dict[c1](2),
  746. )
  747. df2 = DataFrame(
  748. np.random.default_rng(2).standard_normal((4, 2)),
  749. index=idx_func_dict[r2](4),
  750. columns=idx_func_dict[c2](2),
  751. )
  752. df3 = DataFrame(
  753. np.random.default_rng(2).standard_normal((5, 2)),
  754. index=idx_func_dict[r2](5),
  755. columns=idx_func_dict[c2](2),
  756. )
  757. if should_warn(df.index, df2.index, df3.index):
  758. with tm.assert_produces_warning(RuntimeWarning):
  759. res = pd.eval("df + df2 + df3", engine=engine, parser=parser)
  760. else:
  761. res = pd.eval("df + df2 + df3", engine=engine, parser=parser)
  762. tm.assert_frame_equal(res, df + df2 + df3)
  763. @pytest.mark.filterwarnings("ignore::RuntimeWarning")
  764. @pytest.mark.parametrize("index_name", ["index", "columns"])
  765. @pytest.mark.parametrize("c_idx_type", index_types)
  766. @pytest.mark.parametrize("r_idx_type", lhs_index_types)
  767. def test_basic_frame_series_alignment(
  768. self, engine, parser, index_name, r_idx_type, c_idx_type, idx_func_dict
  769. ):
  770. df = DataFrame(
  771. np.random.default_rng(2).standard_normal((10, 10)),
  772. index=idx_func_dict[r_idx_type](10),
  773. columns=idx_func_dict[c_idx_type](10),
  774. )
  775. index = getattr(df, index_name)
  776. s = Series(np.random.default_rng(2).standard_normal(5), index[:5])
  777. if should_warn(df.index, s.index):
  778. with tm.assert_produces_warning(RuntimeWarning):
  779. res = pd.eval("df + s", engine=engine, parser=parser)
  780. else:
  781. res = pd.eval("df + s", engine=engine, parser=parser)
  782. if r_idx_type == "dt" or c_idx_type == "dt":
  783. expected = df.add(s) if engine == "numexpr" else df + s
  784. else:
  785. expected = df + s
  786. tm.assert_frame_equal(res, expected)
  787. @pytest.mark.parametrize("index_name", ["index", "columns"])
  788. @pytest.mark.parametrize(
  789. "r_idx_type, c_idx_type",
  790. list(product(["i", "s"], ["i", "s"])) + [("dt", "dt")],
  791. )
  792. @pytest.mark.filterwarnings("ignore::RuntimeWarning")
  793. def test_basic_series_frame_alignment(
  794. self, request, engine, parser, index_name, r_idx_type, c_idx_type, idx_func_dict
  795. ):
  796. if (
  797. engine == "numexpr"
  798. and parser in ("pandas", "python")
  799. and index_name == "index"
  800. and r_idx_type == "i"
  801. and c_idx_type == "s"
  802. ):
  803. reason = (
  804. f"Flaky column ordering when engine={engine}, "
  805. f"parser={parser}, index_name={index_name}, "
  806. f"r_idx_type={r_idx_type}, c_idx_type={c_idx_type}"
  807. )
  808. request.applymarker(pytest.mark.xfail(reason=reason, strict=False))
  809. df = DataFrame(
  810. np.random.default_rng(2).standard_normal((10, 7)),
  811. index=idx_func_dict[r_idx_type](10),
  812. columns=idx_func_dict[c_idx_type](7),
  813. )
  814. index = getattr(df, index_name)
  815. s = Series(np.random.default_rng(2).standard_normal(5), index[:5])
  816. if should_warn(s.index, df.index):
  817. with tm.assert_produces_warning(RuntimeWarning):
  818. res = pd.eval("s + df", engine=engine, parser=parser)
  819. else:
  820. res = pd.eval("s + df", engine=engine, parser=parser)
  821. if r_idx_type == "dt" or c_idx_type == "dt":
  822. expected = df.add(s) if engine == "numexpr" else s + df
  823. else:
  824. expected = s + df
  825. tm.assert_frame_equal(res, expected)
  826. @pytest.mark.filterwarnings("ignore::RuntimeWarning")
  827. @pytest.mark.parametrize("c_idx_type", index_types)
  828. @pytest.mark.parametrize("r_idx_type", lhs_index_types)
  829. @pytest.mark.parametrize("index_name", ["index", "columns"])
  830. @pytest.mark.parametrize("op", ["+", "*"])
  831. def test_series_frame_commutativity(
  832. self, engine, parser, index_name, op, r_idx_type, c_idx_type, idx_func_dict
  833. ):
  834. df = DataFrame(
  835. np.random.default_rng(2).standard_normal((10, 10)),
  836. index=idx_func_dict[r_idx_type](10),
  837. columns=idx_func_dict[c_idx_type](10),
  838. )
  839. index = getattr(df, index_name)
  840. s = Series(np.random.default_rng(2).standard_normal(5), index[:5])
  841. lhs = f"s {op} df"
  842. rhs = f"df {op} s"
  843. if should_warn(df.index, s.index):
  844. with tm.assert_produces_warning(RuntimeWarning):
  845. a = pd.eval(lhs, engine=engine, parser=parser)
  846. with tm.assert_produces_warning(RuntimeWarning):
  847. b = pd.eval(rhs, engine=engine, parser=parser)
  848. else:
  849. a = pd.eval(lhs, engine=engine, parser=parser)
  850. b = pd.eval(rhs, engine=engine, parser=parser)
  851. if r_idx_type != "dt" and c_idx_type != "dt":
  852. if engine == "numexpr":
  853. tm.assert_frame_equal(a, b)
  854. @pytest.mark.filterwarnings("always::RuntimeWarning")
  855. @pytest.mark.parametrize("r1", lhs_index_types)
  856. @pytest.mark.parametrize("c1", index_types)
  857. @pytest.mark.parametrize("r2", index_types)
  858. @pytest.mark.parametrize("c2", index_types)
  859. def test_complex_series_frame_alignment(
  860. self, engine, parser, r1, c1, r2, c2, idx_func_dict
  861. ):
  862. n = 3
  863. m1 = 5
  864. m2 = 2 * m1
  865. df = DataFrame(
  866. np.random.default_rng(2).standard_normal((m1, n)),
  867. index=idx_func_dict[r1](m1),
  868. columns=idx_func_dict[c1](n),
  869. )
  870. df2 = DataFrame(
  871. np.random.default_rng(2).standard_normal((m2, n)),
  872. index=idx_func_dict[r2](m2),
  873. columns=idx_func_dict[c2](n),
  874. )
  875. index = df2.columns
  876. ser = Series(np.random.default_rng(2).standard_normal(n), index[:n])
  877. if r2 == "dt" or c2 == "dt":
  878. if engine == "numexpr":
  879. expected2 = df2.add(ser)
  880. else:
  881. expected2 = df2 + ser
  882. else:
  883. expected2 = df2 + ser
  884. if r1 == "dt" or c1 == "dt":
  885. if engine == "numexpr":
  886. expected = expected2.add(df)
  887. else:
  888. expected = expected2 + df
  889. else:
  890. expected = expected2 + df
  891. if should_warn(df2.index, ser.index, df.index):
  892. with tm.assert_produces_warning(RuntimeWarning):
  893. res = pd.eval("df2 + ser + df", engine=engine, parser=parser)
  894. else:
  895. res = pd.eval("df2 + ser + df", engine=engine, parser=parser)
  896. assert res.shape == expected.shape
  897. tm.assert_frame_equal(res, expected)
  898. def test_performance_warning_for_poor_alignment(self, engine, parser):
  899. df = DataFrame(np.random.default_rng(2).standard_normal((1000, 10)))
  900. s = Series(np.random.default_rng(2).standard_normal(10000))
  901. if engine == "numexpr":
  902. seen = PerformanceWarning
  903. else:
  904. seen = False
  905. with tm.assert_produces_warning(seen):
  906. pd.eval("df + s", engine=engine, parser=parser)
  907. s = Series(np.random.default_rng(2).standard_normal(1000))
  908. with tm.assert_produces_warning(False):
  909. pd.eval("df + s", engine=engine, parser=parser)
  910. df = DataFrame(np.random.default_rng(2).standard_normal((10, 10000)))
  911. s = Series(np.random.default_rng(2).standard_normal(10000))
  912. with tm.assert_produces_warning(False):
  913. pd.eval("df + s", engine=engine, parser=parser)
  914. df = DataFrame(np.random.default_rng(2).standard_normal((10, 10)))
  915. s = Series(np.random.default_rng(2).standard_normal(10000))
  916. is_python_engine = engine == "python"
  917. if not is_python_engine:
  918. wrn = PerformanceWarning
  919. else:
  920. wrn = False
  921. with tm.assert_produces_warning(wrn) as w:
  922. pd.eval("df + s", engine=engine, parser=parser)
  923. if not is_python_engine:
  924. assert len(w) == 1
  925. msg = str(w[0].message)
  926. logged = np.log10(s.size - df.shape[1])
  927. expected = (
  928. f"Alignment difference on axis 1 is larger "
  929. f"than an order of magnitude on term 'df', "
  930. f"by more than {logged:.4g}; performance may suffer."
  931. )
  932. assert msg == expected
  933. # ------------------------------------
  934. # Slightly more complex ops
  935. class TestOperations:
  936. def eval(self, *args, **kwargs):
  937. kwargs["level"] = kwargs.pop("level", 0) + 1
  938. return pd.eval(*args, **kwargs)
  939. def test_simple_arith_ops(self, engine, parser):
  940. exclude_arith = []
  941. if parser == "python":
  942. exclude_arith = ["in", "not in"]
  943. arith_ops = [
  944. op
  945. for op in expr.ARITH_OPS_SYMS + expr.CMP_OPS_SYMS
  946. if op not in exclude_arith
  947. ]
  948. ops = (op for op in arith_ops if op != "//")
  949. for op in ops:
  950. ex = f"1 {op} 1"
  951. ex2 = f"x {op} 1"
  952. ex3 = f"1 {op} (x + 1)"
  953. if op in ("in", "not in"):
  954. msg = "argument of type 'int' is not iterable"
  955. with pytest.raises(TypeError, match=msg):
  956. pd.eval(ex, engine=engine, parser=parser)
  957. else:
  958. expec = _eval_single_bin(1, op, 1, engine)
  959. x = self.eval(ex, engine=engine, parser=parser)
  960. assert x == expec
  961. expec = _eval_single_bin(x, op, 1, engine)
  962. y = self.eval(ex2, local_dict={"x": x}, engine=engine, parser=parser)
  963. assert y == expec
  964. expec = _eval_single_bin(1, op, x + 1, engine)
  965. y = self.eval(ex3, local_dict={"x": x}, engine=engine, parser=parser)
  966. assert y == expec
  967. @pytest.mark.parametrize("rhs", [True, False])
  968. @pytest.mark.parametrize("lhs", [True, False])
  969. @pytest.mark.parametrize("op", expr.BOOL_OPS_SYMS)
  970. def test_simple_bool_ops(self, rhs, lhs, op):
  971. ex = f"{lhs} {op} {rhs}"
  972. if parser == "python" and op in ["and", "or"]:
  973. msg = "'BoolOp' nodes are not implemented"
  974. with pytest.raises(NotImplementedError, match=msg):
  975. self.eval(ex)
  976. return
  977. res = self.eval(ex)
  978. exp = eval(ex)
  979. assert res == exp
  980. @pytest.mark.parametrize("rhs", [True, False])
  981. @pytest.mark.parametrize("lhs", [True, False])
  982. @pytest.mark.parametrize("op", expr.BOOL_OPS_SYMS)
  983. def test_bool_ops_with_constants(self, rhs, lhs, op):
  984. ex = f"{lhs} {op} {rhs}"
  985. if parser == "python" and op in ["and", "or"]:
  986. msg = "'BoolOp' nodes are not implemented"
  987. with pytest.raises(NotImplementedError, match=msg):
  988. self.eval(ex)
  989. return
  990. res = self.eval(ex)
  991. exp = eval(ex)
  992. assert res == exp
  993. def test_4d_ndarray_fails(self):
  994. x = np.random.default_rng(2).standard_normal((3, 4, 5, 6))
  995. y = Series(np.random.default_rng(2).standard_normal(10))
  996. msg = "N-dimensional objects, where N > 2, are not supported with eval"
  997. with pytest.raises(NotImplementedError, match=msg):
  998. self.eval("x + y", local_dict={"x": x, "y": y})
  999. def test_constant(self):
  1000. x = self.eval("1")
  1001. assert x == 1
  1002. def test_single_variable(self):
  1003. df = DataFrame(np.random.default_rng(2).standard_normal((10, 2)))
  1004. df2 = self.eval("df", local_dict={"df": df})
  1005. tm.assert_frame_equal(df, df2)
  1006. def test_failing_subscript_with_name_error(self):
  1007. df = DataFrame(np.random.default_rng(2).standard_normal((5, 3))) # noqa: F841
  1008. with pytest.raises(NameError, match="name 'x' is not defined"):
  1009. self.eval("df[x > 2] > 2")
  1010. def test_lhs_expression_subscript(self):
  1011. df = DataFrame(np.random.default_rng(2).standard_normal((5, 3)))
  1012. result = self.eval("(df + 1)[df > 2]", local_dict={"df": df})
  1013. expected = (df + 1)[df > 2]
  1014. tm.assert_frame_equal(result, expected)
  1015. def test_attr_expression(self):
  1016. df = DataFrame(
  1017. np.random.default_rng(2).standard_normal((5, 3)), columns=list("abc")
  1018. )
  1019. expr1 = "df.a < df.b"
  1020. expec1 = df.a < df.b
  1021. expr2 = "df.a + df.b + df.c"
  1022. expec2 = df.a + df.b + df.c
  1023. expr3 = "df.a + df.b + df.c[df.b < 0]"
  1024. expec3 = df.a + df.b + df.c[df.b < 0]
  1025. exprs = expr1, expr2, expr3
  1026. expecs = expec1, expec2, expec3
  1027. for e, expec in zip(exprs, expecs):
  1028. tm.assert_series_equal(expec, self.eval(e, local_dict={"df": df}))
  1029. def test_assignment_fails(self):
  1030. df = DataFrame(
  1031. np.random.default_rng(2).standard_normal((5, 3)), columns=list("abc")
  1032. )
  1033. df2 = DataFrame(np.random.default_rng(2).standard_normal((5, 3)))
  1034. expr1 = "df = df2"
  1035. msg = "cannot assign without a target object"
  1036. with pytest.raises(ValueError, match=msg):
  1037. self.eval(expr1, local_dict={"df": df, "df2": df2})
  1038. def test_assignment_column_multiple_raise(self):
  1039. df = DataFrame(
  1040. np.random.default_rng(2).standard_normal((5, 2)), columns=list("ab")
  1041. )
  1042. # multiple assignees
  1043. with pytest.raises(SyntaxError, match="invalid syntax"):
  1044. df.eval("d c = a + b")
  1045. def test_assignment_column_invalid_assign(self):
  1046. df = DataFrame(
  1047. np.random.default_rng(2).standard_normal((5, 2)), columns=list("ab")
  1048. )
  1049. # invalid assignees
  1050. msg = "left hand side of an assignment must be a single name"
  1051. with pytest.raises(SyntaxError, match=msg):
  1052. df.eval("d,c = a + b")
  1053. def test_assignment_column_invalid_assign_function_call(self):
  1054. df = DataFrame(
  1055. np.random.default_rng(2).standard_normal((5, 2)), columns=list("ab")
  1056. )
  1057. msg = "cannot assign to function call"
  1058. with pytest.raises(SyntaxError, match=msg):
  1059. df.eval('Timestamp("20131001") = a + b')
  1060. def test_assignment_single_assign_existing(self):
  1061. df = DataFrame(
  1062. np.random.default_rng(2).standard_normal((5, 2)), columns=list("ab")
  1063. )
  1064. # single assignment - existing variable
  1065. expected = df.copy()
  1066. expected["a"] = expected["a"] + expected["b"]
  1067. df.eval("a = a + b", inplace=True)
  1068. tm.assert_frame_equal(df, expected)
  1069. def test_assignment_single_assign_new(self):
  1070. df = DataFrame(
  1071. np.random.default_rng(2).standard_normal((5, 2)), columns=list("ab")
  1072. )
  1073. # single assignment - new variable
  1074. expected = df.copy()
  1075. expected["c"] = expected["a"] + expected["b"]
  1076. df.eval("c = a + b", inplace=True)
  1077. tm.assert_frame_equal(df, expected)
  1078. def test_assignment_single_assign_local_overlap(self):
  1079. df = DataFrame(
  1080. np.random.default_rng(2).standard_normal((5, 2)), columns=list("ab")
  1081. )
  1082. df = df.copy()
  1083. a = 1 # noqa: F841
  1084. df.eval("a = 1 + b", inplace=True)
  1085. expected = df.copy()
  1086. expected["a"] = 1 + expected["b"]
  1087. tm.assert_frame_equal(df, expected)
  1088. def test_assignment_single_assign_name(self):
  1089. df = DataFrame(
  1090. np.random.default_rng(2).standard_normal((5, 2)), columns=list("ab")
  1091. )
  1092. a = 1 # noqa: F841
  1093. old_a = df.a.copy()
  1094. df.eval("a = a + b", inplace=True)
  1095. result = old_a + df.b
  1096. tm.assert_series_equal(result, df.a, check_names=False)
  1097. assert result.name is None
  1098. def test_assignment_multiple_raises(self):
  1099. df = DataFrame(
  1100. np.random.default_rng(2).standard_normal((5, 2)), columns=list("ab")
  1101. )
  1102. # multiple assignment
  1103. df.eval("c = a + b", inplace=True)
  1104. msg = "can only assign a single expression"
  1105. with pytest.raises(SyntaxError, match=msg):
  1106. df.eval("c = a = b")
  1107. def test_assignment_explicit(self):
  1108. df = DataFrame(
  1109. np.random.default_rng(2).standard_normal((5, 2)), columns=list("ab")
  1110. )
  1111. # explicit targets
  1112. self.eval("c = df.a + df.b", local_dict={"df": df}, target=df, inplace=True)
  1113. expected = df.copy()
  1114. expected["c"] = expected["a"] + expected["b"]
  1115. tm.assert_frame_equal(df, expected)
  1116. def test_column_in(self):
  1117. # GH 11235
  1118. df = DataFrame({"a": [11], "b": [-32]})
  1119. result = df.eval("a in [11, -32]")
  1120. expected = Series([True])
  1121. # TODO: 2022-01-29: Name check failed with numexpr 2.7.3 in CI
  1122. # but cannot reproduce locally
  1123. tm.assert_series_equal(result, expected, check_names=False)
  1124. @pytest.mark.xfail(reason="Unknown: Omitted test_ in name prior.")
  1125. def test_assignment_not_inplace(self):
  1126. # see gh-9297
  1127. df = DataFrame(
  1128. np.random.default_rng(2).standard_normal((5, 2)), columns=list("ab")
  1129. )
  1130. actual = df.eval("c = a + b", inplace=False)
  1131. assert actual is not None
  1132. expected = df.copy()
  1133. expected["c"] = expected["a"] + expected["b"]
  1134. tm.assert_frame_equal(df, expected)
  1135. def test_multi_line_expression(self, warn_copy_on_write):
  1136. # GH 11149
  1137. df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
  1138. expected = df.copy()
  1139. expected["c"] = expected["a"] + expected["b"]
  1140. expected["d"] = expected["c"] + expected["b"]
  1141. answer = df.eval(
  1142. """
  1143. c = a + b
  1144. d = c + b""",
  1145. inplace=True,
  1146. )
  1147. tm.assert_frame_equal(expected, df)
  1148. assert answer is None
  1149. expected["a"] = expected["a"] - 1
  1150. expected["e"] = expected["a"] + 2
  1151. answer = df.eval(
  1152. """
  1153. a = a - 1
  1154. e = a + 2""",
  1155. inplace=True,
  1156. )
  1157. tm.assert_frame_equal(expected, df)
  1158. assert answer is None
  1159. # multi-line not valid if not all assignments
  1160. msg = "Multi-line expressions are only valid if all expressions contain"
  1161. with pytest.raises(ValueError, match=msg):
  1162. df.eval(
  1163. """
  1164. a = b + 2
  1165. b - 2""",
  1166. inplace=False,
  1167. )
  1168. def test_multi_line_expression_not_inplace(self):
  1169. # GH 11149
  1170. df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
  1171. expected = df.copy()
  1172. expected["c"] = expected["a"] + expected["b"]
  1173. expected["d"] = expected["c"] + expected["b"]
  1174. df = df.eval(
  1175. """
  1176. c = a + b
  1177. d = c + b""",
  1178. inplace=False,
  1179. )
  1180. tm.assert_frame_equal(expected, df)
  1181. expected["a"] = expected["a"] - 1
  1182. expected["e"] = expected["a"] + 2
  1183. df = df.eval(
  1184. """
  1185. a = a - 1
  1186. e = a + 2""",
  1187. inplace=False,
  1188. )
  1189. tm.assert_frame_equal(expected, df)
  1190. def test_multi_line_expression_local_variable(self):
  1191. # GH 15342
  1192. df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
  1193. expected = df.copy()
  1194. local_var = 7
  1195. expected["c"] = expected["a"] * local_var
  1196. expected["d"] = expected["c"] + local_var
  1197. answer = df.eval(
  1198. """
  1199. c = a * @local_var
  1200. d = c + @local_var
  1201. """,
  1202. inplace=True,
  1203. )
  1204. tm.assert_frame_equal(expected, df)
  1205. assert answer is None
  1206. def test_multi_line_expression_callable_local_variable(self):
  1207. # 26426
  1208. df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
  1209. def local_func(a, b):
  1210. return b
  1211. expected = df.copy()
  1212. expected["c"] = expected["a"] * local_func(1, 7)
  1213. expected["d"] = expected["c"] + local_func(1, 7)
  1214. answer = df.eval(
  1215. """
  1216. c = a * @local_func(1, 7)
  1217. d = c + @local_func(1, 7)
  1218. """,
  1219. inplace=True,
  1220. )
  1221. tm.assert_frame_equal(expected, df)
  1222. assert answer is None
  1223. def test_multi_line_expression_callable_local_variable_with_kwargs(self):
  1224. # 26426
  1225. df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
  1226. def local_func(a, b):
  1227. return b
  1228. expected = df.copy()
  1229. expected["c"] = expected["a"] * local_func(b=7, a=1)
  1230. expected["d"] = expected["c"] + local_func(b=7, a=1)
  1231. answer = df.eval(
  1232. """
  1233. c = a * @local_func(b=7, a=1)
  1234. d = c + @local_func(b=7, a=1)
  1235. """,
  1236. inplace=True,
  1237. )
  1238. tm.assert_frame_equal(expected, df)
  1239. assert answer is None
  1240. def test_assignment_in_query(self):
  1241. # GH 8664
  1242. df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
  1243. df_orig = df.copy()
  1244. msg = "cannot assign without a target object"
  1245. with pytest.raises(ValueError, match=msg):
  1246. df.query("a = 1")
  1247. tm.assert_frame_equal(df, df_orig)
  1248. def test_query_inplace(self):
  1249. # see gh-11149
  1250. df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
  1251. expected = df.copy()
  1252. expected = expected[expected["a"] == 2]
  1253. df.query("a == 2", inplace=True)
  1254. tm.assert_frame_equal(expected, df)
  1255. df = {}
  1256. expected = {"a": 3}
  1257. self.eval("a = 1 + 2", target=df, inplace=True)
  1258. tm.assert_dict_equal(df, expected)
  1259. @pytest.mark.parametrize("invalid_target", [1, "cat", [1, 2], np.array([]), (1, 3)])
  1260. def test_cannot_item_assign(self, invalid_target):
  1261. msg = "Cannot assign expression output to target"
  1262. expression = "a = 1 + 2"
  1263. with pytest.raises(ValueError, match=msg):
  1264. self.eval(expression, target=invalid_target, inplace=True)
  1265. if hasattr(invalid_target, "copy"):
  1266. with pytest.raises(ValueError, match=msg):
  1267. self.eval(expression, target=invalid_target, inplace=False)
  1268. @pytest.mark.parametrize("invalid_target", [1, "cat", (1, 3)])
  1269. def test_cannot_copy_item(self, invalid_target):
  1270. msg = "Cannot return a copy of the target"
  1271. expression = "a = 1 + 2"
  1272. with pytest.raises(ValueError, match=msg):
  1273. self.eval(expression, target=invalid_target, inplace=False)
  1274. @pytest.mark.parametrize("target", [1, "cat", [1, 2], np.array([]), (1, 3), {1: 2}])
  1275. def test_inplace_no_assignment(self, target):
  1276. expression = "1 + 2"
  1277. assert self.eval(expression, target=target, inplace=False) == 3
  1278. msg = "Cannot operate inplace if there is no assignment"
  1279. with pytest.raises(ValueError, match=msg):
  1280. self.eval(expression, target=target, inplace=True)
  1281. def test_basic_period_index_boolean_expression(self):
  1282. df = DataFrame(
  1283. np.random.default_rng(2).standard_normal((2, 2)),
  1284. columns=period_range("2020-01-01", freq="D", periods=2),
  1285. )
  1286. e = df < 2
  1287. r = self.eval("df < 2", local_dict={"df": df})
  1288. x = df < 2
  1289. tm.assert_frame_equal(r, e)
  1290. tm.assert_frame_equal(x, e)
  1291. def test_basic_period_index_subscript_expression(self):
  1292. df = DataFrame(
  1293. np.random.default_rng(2).standard_normal((2, 2)),
  1294. columns=period_range("2020-01-01", freq="D", periods=2),
  1295. )
  1296. r = self.eval("df[df < 2 + 3]", local_dict={"df": df})
  1297. e = df[df < 2 + 3]
  1298. tm.assert_frame_equal(r, e)
  1299. def test_nested_period_index_subscript_expression(self):
  1300. df = DataFrame(
  1301. np.random.default_rng(2).standard_normal((2, 2)),
  1302. columns=period_range("2020-01-01", freq="D", periods=2),
  1303. )
  1304. r = self.eval("df[df[df < 2] < 2] + df * 2", local_dict={"df": df})
  1305. e = df[df[df < 2] < 2] + df * 2
  1306. tm.assert_frame_equal(r, e)
  1307. def test_date_boolean(self, engine, parser):
  1308. df = DataFrame(np.random.default_rng(2).standard_normal((5, 3)))
  1309. df["dates1"] = date_range("1/1/2012", periods=5)
  1310. res = self.eval(
  1311. "df.dates1 < 20130101",
  1312. local_dict={"df": df},
  1313. engine=engine,
  1314. parser=parser,
  1315. )
  1316. expec = df.dates1 < "20130101"
  1317. tm.assert_series_equal(res, expec, check_names=False)
  1318. def test_simple_in_ops(self, engine, parser):
  1319. if parser != "python":
  1320. res = pd.eval("1 in [1, 2]", engine=engine, parser=parser)
  1321. assert res
  1322. res = pd.eval("2 in (1, 2)", engine=engine, parser=parser)
  1323. assert res
  1324. res = pd.eval("3 in (1, 2)", engine=engine, parser=parser)
  1325. assert not res
  1326. res = pd.eval("3 not in (1, 2)", engine=engine, parser=parser)
  1327. assert res
  1328. res = pd.eval("[3] not in (1, 2)", engine=engine, parser=parser)
  1329. assert res
  1330. res = pd.eval("[3] in ([3], 2)", engine=engine, parser=parser)
  1331. assert res
  1332. res = pd.eval("[[3]] in [[[3]], 2]", engine=engine, parser=parser)
  1333. assert res
  1334. res = pd.eval("(3,) in [(3,), 2]", engine=engine, parser=parser)
  1335. assert res
  1336. res = pd.eval("(3,) not in [(3,), 2]", engine=engine, parser=parser)
  1337. assert not res
  1338. res = pd.eval("[(3,)] in [[(3,)], 2]", engine=engine, parser=parser)
  1339. assert res
  1340. else:
  1341. msg = "'In' nodes are not implemented"
  1342. with pytest.raises(NotImplementedError, match=msg):
  1343. pd.eval("1 in [1, 2]", engine=engine, parser=parser)
  1344. with pytest.raises(NotImplementedError, match=msg):
  1345. pd.eval("2 in (1, 2)", engine=engine, parser=parser)
  1346. with pytest.raises(NotImplementedError, match=msg):
  1347. pd.eval("3 in (1, 2)", engine=engine, parser=parser)
  1348. with pytest.raises(NotImplementedError, match=msg):
  1349. pd.eval("[(3,)] in (1, 2, [(3,)])", engine=engine, parser=parser)
  1350. msg = "'NotIn' nodes are not implemented"
  1351. with pytest.raises(NotImplementedError, match=msg):
  1352. pd.eval("3 not in (1, 2)", engine=engine, parser=parser)
  1353. with pytest.raises(NotImplementedError, match=msg):
  1354. pd.eval("[3] not in (1, 2, [[3]])", engine=engine, parser=parser)
  1355. def test_check_many_exprs(self, engine, parser):
  1356. a = 1 # noqa: F841
  1357. expr = " * ".join("a" * 33)
  1358. expected = 1
  1359. res = pd.eval(expr, engine=engine, parser=parser)
  1360. assert res == expected
  1361. @pytest.mark.parametrize(
  1362. "expr",
  1363. [
  1364. "df > 2 and df > 3",
  1365. "df > 2 or df > 3",
  1366. "not df > 2",
  1367. ],
  1368. )
  1369. def test_fails_and_or_not(self, expr, engine, parser):
  1370. df = DataFrame(np.random.default_rng(2).standard_normal((5, 3)))
  1371. if parser == "python":
  1372. msg = "'BoolOp' nodes are not implemented"
  1373. if "not" in expr:
  1374. msg = "'Not' nodes are not implemented"
  1375. with pytest.raises(NotImplementedError, match=msg):
  1376. pd.eval(
  1377. expr,
  1378. local_dict={"df": df},
  1379. parser=parser,
  1380. engine=engine,
  1381. )
  1382. else:
  1383. # smoke-test, should not raise
  1384. pd.eval(
  1385. expr,
  1386. local_dict={"df": df},
  1387. parser=parser,
  1388. engine=engine,
  1389. )
  1390. @pytest.mark.parametrize("char", ["|", "&"])
  1391. def test_fails_ampersand_pipe(self, char, engine, parser):
  1392. df = DataFrame(np.random.default_rng(2).standard_normal((5, 3))) # noqa: F841
  1393. ex = f"(df + 2)[df > 1] > 0 {char} (df > 0)"
  1394. if parser == "python":
  1395. msg = "cannot evaluate scalar only bool ops"
  1396. with pytest.raises(NotImplementedError, match=msg):
  1397. pd.eval(ex, parser=parser, engine=engine)
  1398. else:
  1399. # smoke-test, should not raise
  1400. pd.eval(ex, parser=parser, engine=engine)
  1401. class TestMath:
  1402. def eval(self, *args, **kwargs):
  1403. kwargs["level"] = kwargs.pop("level", 0) + 1
  1404. return pd.eval(*args, **kwargs)
  1405. @pytest.mark.skipif(
  1406. not NUMEXPR_INSTALLED, reason="Unary ops only implemented for numexpr"
  1407. )
  1408. @pytest.mark.parametrize("fn", _unary_math_ops)
  1409. def test_unary_functions(self, fn):
  1410. df = DataFrame({"a": np.random.default_rng(2).standard_normal(10)})
  1411. a = df.a
  1412. expr = f"{fn}(a)"
  1413. got = self.eval(expr)
  1414. with np.errstate(all="ignore"):
  1415. expect = getattr(np, fn)(a)
  1416. tm.assert_series_equal(got, expect, check_names=False)
  1417. @pytest.mark.parametrize("fn", _binary_math_ops)
  1418. def test_binary_functions(self, fn):
  1419. df = DataFrame(
  1420. {
  1421. "a": np.random.default_rng(2).standard_normal(10),
  1422. "b": np.random.default_rng(2).standard_normal(10),
  1423. }
  1424. )
  1425. a = df.a
  1426. b = df.b
  1427. expr = f"{fn}(a, b)"
  1428. got = self.eval(expr)
  1429. with np.errstate(all="ignore"):
  1430. expect = getattr(np, fn)(a, b)
  1431. tm.assert_almost_equal(got, expect, check_names=False)
  1432. def test_df_use_case(self, engine, parser):
  1433. df = DataFrame(
  1434. {
  1435. "a": np.random.default_rng(2).standard_normal(10),
  1436. "b": np.random.default_rng(2).standard_normal(10),
  1437. }
  1438. )
  1439. df.eval(
  1440. "e = arctan2(sin(a), b)",
  1441. engine=engine,
  1442. parser=parser,
  1443. inplace=True,
  1444. )
  1445. got = df.e
  1446. expect = np.arctan2(np.sin(df.a), df.b)
  1447. tm.assert_series_equal(got, expect, check_names=False)
  1448. def test_df_arithmetic_subexpression(self, engine, parser):
  1449. df = DataFrame(
  1450. {
  1451. "a": np.random.default_rng(2).standard_normal(10),
  1452. "b": np.random.default_rng(2).standard_normal(10),
  1453. }
  1454. )
  1455. df.eval("e = sin(a + b)", engine=engine, parser=parser, inplace=True)
  1456. got = df.e
  1457. expect = np.sin(df.a + df.b)
  1458. tm.assert_series_equal(got, expect, check_names=False)
  1459. @pytest.mark.parametrize(
  1460. "dtype, expect_dtype",
  1461. [
  1462. (np.int32, np.float64),
  1463. (np.int64, np.float64),
  1464. (np.float32, np.float32),
  1465. (np.float64, np.float64),
  1466. pytest.param(np.complex128, np.complex128, marks=td.skip_if_windows),
  1467. ],
  1468. )
  1469. def test_result_types(self, dtype, expect_dtype, engine, parser):
  1470. # xref https://github.com/pandas-dev/pandas/issues/12293
  1471. # this fails on Windows, apparently a floating point precision issue
  1472. # Did not test complex64 because DataFrame is converting it to
  1473. # complex128. Due to https://github.com/pandas-dev/pandas/issues/10952
  1474. df = DataFrame(
  1475. {"a": np.random.default_rng(2).standard_normal(10).astype(dtype)}
  1476. )
  1477. assert df.a.dtype == dtype
  1478. df.eval("b = sin(a)", engine=engine, parser=parser, inplace=True)
  1479. got = df.b
  1480. expect = np.sin(df.a)
  1481. assert expect.dtype == got.dtype
  1482. assert expect_dtype == got.dtype
  1483. tm.assert_series_equal(got, expect, check_names=False)
  1484. def test_undefined_func(self, engine, parser):
  1485. df = DataFrame({"a": np.random.default_rng(2).standard_normal(10)})
  1486. msg = '"mysin" is not a supported function'
  1487. with pytest.raises(ValueError, match=msg):
  1488. df.eval("mysin(a)", engine=engine, parser=parser)
  1489. def test_keyword_arg(self, engine, parser):
  1490. df = DataFrame({"a": np.random.default_rng(2).standard_normal(10)})
  1491. msg = 'Function "sin" does not support keyword arguments'
  1492. with pytest.raises(TypeError, match=msg):
  1493. df.eval("sin(x=a)", engine=engine, parser=parser)
  1494. _var_s = np.random.default_rng(2).standard_normal(10)
  1495. class TestScope:
  1496. def test_global_scope(self, engine, parser):
  1497. e = "_var_s * 2"
  1498. tm.assert_numpy_array_equal(
  1499. _var_s * 2, pd.eval(e, engine=engine, parser=parser)
  1500. )
  1501. def test_no_new_locals(self, engine, parser):
  1502. x = 1
  1503. lcls = locals().copy()
  1504. pd.eval("x + 1", local_dict=lcls, engine=engine, parser=parser)
  1505. lcls2 = locals().copy()
  1506. lcls2.pop("lcls")
  1507. assert lcls == lcls2
  1508. def test_no_new_globals(self, engine, parser):
  1509. x = 1 # noqa: F841
  1510. gbls = globals().copy()
  1511. pd.eval("x + 1", engine=engine, parser=parser)
  1512. gbls2 = globals().copy()
  1513. assert gbls == gbls2
  1514. def test_empty_locals(self, engine, parser):
  1515. # GH 47084
  1516. x = 1 # noqa: F841
  1517. msg = "name 'x' is not defined"
  1518. with pytest.raises(UndefinedVariableError, match=msg):
  1519. pd.eval("x + 1", engine=engine, parser=parser, local_dict={})
  1520. def test_empty_globals(self, engine, parser):
  1521. # GH 47084
  1522. msg = "name '_var_s' is not defined"
  1523. e = "_var_s * 2"
  1524. with pytest.raises(UndefinedVariableError, match=msg):
  1525. pd.eval(e, engine=engine, parser=parser, global_dict={})
  1526. @td.skip_if_no("numexpr")
  1527. def test_invalid_engine():
  1528. msg = "Invalid engine 'asdf' passed"
  1529. with pytest.raises(KeyError, match=msg):
  1530. pd.eval("x + y", local_dict={"x": 1, "y": 2}, engine="asdf")
  1531. @td.skip_if_no("numexpr")
  1532. @pytest.mark.parametrize(
  1533. ("use_numexpr", "expected"),
  1534. (
  1535. (True, "numexpr"),
  1536. (False, "python"),
  1537. ),
  1538. )
  1539. def test_numexpr_option_respected(use_numexpr, expected):
  1540. # GH 32556
  1541. from pandas.core.computation.eval import _check_engine
  1542. with pd.option_context("compute.use_numexpr", use_numexpr):
  1543. result = _check_engine(None)
  1544. assert result == expected
  1545. @td.skip_if_no("numexpr")
  1546. def test_numexpr_option_incompatible_op():
  1547. # GH 32556
  1548. with pd.option_context("compute.use_numexpr", False):
  1549. df = DataFrame(
  1550. {"A": [True, False, True, False, None, None], "B": [1, 2, 3, 4, 5, 6]}
  1551. )
  1552. result = df.query("A.isnull()")
  1553. expected = DataFrame({"A": [None, None], "B": [5, 6]}, index=[4, 5])
  1554. tm.assert_frame_equal(result, expected)
  1555. @td.skip_if_no("numexpr")
  1556. def test_invalid_parser():
  1557. msg = "Invalid parser 'asdf' passed"
  1558. with pytest.raises(KeyError, match=msg):
  1559. pd.eval("x + y", local_dict={"x": 1, "y": 2}, parser="asdf")
  1560. _parsers: dict[str, type[BaseExprVisitor]] = {
  1561. "python": PythonExprVisitor,
  1562. "pytables": pytables.PyTablesExprVisitor,
  1563. "pandas": PandasExprVisitor,
  1564. }
  1565. @pytest.mark.parametrize("engine", ENGINES)
  1566. @pytest.mark.parametrize("parser", _parsers)
  1567. def test_disallowed_nodes(engine, parser):
  1568. VisitorClass = _parsers[parser]
  1569. inst = VisitorClass("x + 1", engine, parser)
  1570. for ops in VisitorClass.unsupported_nodes:
  1571. msg = "nodes are not implemented"
  1572. with pytest.raises(NotImplementedError, match=msg):
  1573. getattr(inst, ops)()
  1574. def test_syntax_error_exprs(engine, parser):
  1575. e = "s +"
  1576. with pytest.raises(SyntaxError, match="invalid syntax"):
  1577. pd.eval(e, engine=engine, parser=parser)
  1578. def test_name_error_exprs(engine, parser):
  1579. e = "s + t"
  1580. msg = "name 's' is not defined"
  1581. with pytest.raises(NameError, match=msg):
  1582. pd.eval(e, engine=engine, parser=parser)
  1583. @pytest.mark.parametrize("express", ["a + @b", "@a + b", "@a + @b"])
  1584. def test_invalid_local_variable_reference(engine, parser, express):
  1585. a, b = 1, 2 # noqa: F841
  1586. if parser != "pandas":
  1587. with pytest.raises(SyntaxError, match="The '@' prefix is only"):
  1588. pd.eval(express, engine=engine, parser=parser)
  1589. else:
  1590. with pytest.raises(SyntaxError, match="The '@' prefix is not"):
  1591. pd.eval(express, engine=engine, parser=parser)
  1592. def test_numexpr_builtin_raises(engine, parser):
  1593. sin, dotted_line = 1, 2
  1594. if engine == "numexpr":
  1595. msg = "Variables in expression .+"
  1596. with pytest.raises(NumExprClobberingError, match=msg):
  1597. pd.eval("sin + dotted_line", engine=engine, parser=parser)
  1598. else:
  1599. res = pd.eval("sin + dotted_line", engine=engine, parser=parser)
  1600. assert res == sin + dotted_line
  1601. def test_bad_resolver_raises(engine, parser):
  1602. cannot_resolve = 42, 3.0
  1603. with pytest.raises(TypeError, match="Resolver of type .+"):
  1604. pd.eval("1 + 2", resolvers=cannot_resolve, engine=engine, parser=parser)
  1605. def test_empty_string_raises(engine, parser):
  1606. # GH 13139
  1607. with pytest.raises(ValueError, match="expr cannot be an empty string"):
  1608. pd.eval("", engine=engine, parser=parser)
  1609. def test_more_than_one_expression_raises(engine, parser):
  1610. with pytest.raises(SyntaxError, match="only a single expression is allowed"):
  1611. pd.eval("1 + 1; 2 + 2", engine=engine, parser=parser)
  1612. @pytest.mark.parametrize("cmp", ("and", "or"))
  1613. @pytest.mark.parametrize("lhs", (int, float))
  1614. @pytest.mark.parametrize("rhs", (int, float))
  1615. def test_bool_ops_fails_on_scalars(lhs, cmp, rhs, engine, parser):
  1616. gen = {
  1617. int: lambda: np.random.default_rng(2).integers(10),
  1618. float: np.random.default_rng(2).standard_normal,
  1619. }
  1620. mid = gen[lhs]() # noqa: F841
  1621. lhs = gen[lhs]()
  1622. rhs = gen[rhs]()
  1623. ex1 = f"lhs {cmp} mid {cmp} rhs"
  1624. ex2 = f"lhs {cmp} mid and mid {cmp} rhs"
  1625. ex3 = f"(lhs {cmp} mid) & (mid {cmp} rhs)"
  1626. for ex in (ex1, ex2, ex3):
  1627. msg = "cannot evaluate scalar only bool ops|'BoolOp' nodes are not"
  1628. with pytest.raises(NotImplementedError, match=msg):
  1629. pd.eval(ex, engine=engine, parser=parser)
  1630. @pytest.mark.parametrize(
  1631. "other",
  1632. [
  1633. "'x'",
  1634. "...",
  1635. ],
  1636. )
  1637. def test_equals_various(other):
  1638. df = DataFrame({"A": ["a", "b", "c"]}, dtype=object)
  1639. result = df.eval(f"A == {other}")
  1640. expected = Series([False, False, False], name="A")
  1641. if USE_NUMEXPR:
  1642. # https://github.com/pandas-dev/pandas/issues/10239
  1643. # lose name with numexpr engine. Remove when that's fixed.
  1644. expected.name = None
  1645. tm.assert_series_equal(result, expected)
  1646. def test_inf(engine, parser):
  1647. s = "inf + 1"
  1648. expected = np.inf
  1649. result = pd.eval(s, engine=engine, parser=parser)
  1650. assert result == expected
  1651. @pytest.mark.parametrize("column", ["Temp(°C)", "Capacitance(μF)"])
  1652. def test_query_token(engine, column):
  1653. # See: https://github.com/pandas-dev/pandas/pull/42826
  1654. df = DataFrame(
  1655. np.random.default_rng(2).standard_normal((5, 2)), columns=[column, "b"]
  1656. )
  1657. expected = df[df[column] > 5]
  1658. query_string = f"`{column}` > 5"
  1659. result = df.query(query_string, engine=engine)
  1660. tm.assert_frame_equal(result, expected)
  1661. def test_negate_lt_eq_le(engine, parser):
  1662. df = DataFrame([[0, 10], [1, 20]], columns=["cat", "count"])
  1663. expected = df[~(df.cat > 0)]
  1664. result = df.query("~(cat > 0)", engine=engine, parser=parser)
  1665. tm.assert_frame_equal(result, expected)
  1666. if parser == "python":
  1667. msg = "'Not' nodes are not implemented"
  1668. with pytest.raises(NotImplementedError, match=msg):
  1669. df.query("not (cat > 0)", engine=engine, parser=parser)
  1670. else:
  1671. result = df.query("not (cat > 0)", engine=engine, parser=parser)
  1672. tm.assert_frame_equal(result, expected)
  1673. @pytest.mark.parametrize(
  1674. "column",
  1675. DEFAULT_GLOBALS.keys(),
  1676. )
  1677. def test_eval_no_support_column_name(request, column):
  1678. # GH 44603
  1679. if column in ["True", "False", "inf", "Inf"]:
  1680. request.applymarker(
  1681. pytest.mark.xfail(
  1682. raises=KeyError,
  1683. reason=f"GH 47859 DataFrame eval not supported with {column}",
  1684. )
  1685. )
  1686. df = DataFrame(
  1687. np.random.default_rng(2).integers(0, 100, size=(10, 2)),
  1688. columns=[column, "col1"],
  1689. )
  1690. expected = df[df[column] > 6]
  1691. result = df.query(f"{column}>6")
  1692. tm.assert_frame_equal(result, expected)
  1693. def test_set_inplace(using_copy_on_write, warn_copy_on_write):
  1694. # https://github.com/pandas-dev/pandas/issues/47449
  1695. # Ensure we don't only update the DataFrame inplace, but also the actual
  1696. # column values, such that references to this column also get updated
  1697. df = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6], "C": [7, 8, 9]})
  1698. result_view = df[:]
  1699. ser = df["A"]
  1700. with tm.assert_cow_warning(warn_copy_on_write):
  1701. df.eval("A = B + C", inplace=True)
  1702. expected = DataFrame({"A": [11, 13, 15], "B": [4, 5, 6], "C": [7, 8, 9]})
  1703. tm.assert_frame_equal(df, expected)
  1704. if not using_copy_on_write:
  1705. tm.assert_series_equal(ser, expected["A"])
  1706. tm.assert_series_equal(result_view["A"], expected["A"])
  1707. else:
  1708. expected = Series([1, 2, 3], name="A")
  1709. tm.assert_series_equal(ser, expected)
  1710. tm.assert_series_equal(result_view["A"], expected)
  1711. class TestValidate:
  1712. @pytest.mark.parametrize("value", [1, "True", [1, 2, 3], 5.0])
  1713. def test_validate_bool_args(self, value):
  1714. msg = 'For argument "inplace" expected type bool, received type'
  1715. with pytest.raises(ValueError, match=msg):
  1716. pd.eval("2+2", inplace=value)