test_pivot.py 92 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660266126622663266426652666266726682669267026712672267326742675267626772678267926802681268226832684268526862687268826892690269126922693269426952696269726982699270027012702270327042705270627072708270927102711271227132714271527162717271827192720272127222723272427252726
  1. from datetime import (
  2. date,
  3. datetime,
  4. timedelta,
  5. )
  6. from itertools import product
  7. import re
  8. import numpy as np
  9. import pytest
  10. from pandas._config import using_string_dtype
  11. from pandas.errors import PerformanceWarning
  12. import pandas as pd
  13. from pandas import (
  14. Categorical,
  15. DataFrame,
  16. Grouper,
  17. Index,
  18. MultiIndex,
  19. Series,
  20. concat,
  21. date_range,
  22. )
  23. import pandas._testing as tm
  24. from pandas.api.types import CategoricalDtype
  25. from pandas.core.reshape import reshape as reshape_lib
  26. from pandas.core.reshape.pivot import pivot_table
  27. @pytest.fixture(params=[True, False])
  28. def dropna(request):
  29. return request.param
  30. @pytest.fixture(params=[([0] * 4, [1] * 4), (range(3), range(1, 4))])
  31. def interval_values(request, closed):
  32. left, right = request.param
  33. return Categorical(pd.IntervalIndex.from_arrays(left, right, closed))
  34. class TestPivotTable:
  35. @pytest.fixture
  36. def data(self):
  37. return DataFrame(
  38. {
  39. "A": [
  40. "foo",
  41. "foo",
  42. "foo",
  43. "foo",
  44. "bar",
  45. "bar",
  46. "bar",
  47. "bar",
  48. "foo",
  49. "foo",
  50. "foo",
  51. ],
  52. "B": [
  53. "one",
  54. "one",
  55. "one",
  56. "two",
  57. "one",
  58. "one",
  59. "one",
  60. "two",
  61. "two",
  62. "two",
  63. "one",
  64. ],
  65. "C": [
  66. "dull",
  67. "dull",
  68. "shiny",
  69. "dull",
  70. "dull",
  71. "shiny",
  72. "shiny",
  73. "dull",
  74. "shiny",
  75. "shiny",
  76. "shiny",
  77. ],
  78. "D": np.random.default_rng(2).standard_normal(11),
  79. "E": np.random.default_rng(2).standard_normal(11),
  80. "F": np.random.default_rng(2).standard_normal(11),
  81. }
  82. )
  83. def test_pivot_table(self, observed, data):
  84. index = ["A", "B"]
  85. columns = "C"
  86. table = pivot_table(
  87. data, values="D", index=index, columns=columns, observed=observed
  88. )
  89. table2 = data.pivot_table(
  90. values="D", index=index, columns=columns, observed=observed
  91. )
  92. tm.assert_frame_equal(table, table2)
  93. # this works
  94. pivot_table(data, values="D", index=index, observed=observed)
  95. if len(index) > 1:
  96. assert table.index.names == tuple(index)
  97. else:
  98. assert table.index.name == index[0]
  99. if len(columns) > 1:
  100. assert table.columns.names == columns
  101. else:
  102. assert table.columns.name == columns[0]
  103. expected = data.groupby(index + [columns])["D"].agg("mean").unstack()
  104. tm.assert_frame_equal(table, expected)
  105. def test_pivot_table_categorical_observed_equal(self, observed):
  106. # issue #24923
  107. df = DataFrame(
  108. {"col1": list("abcde"), "col2": list("fghij"), "col3": [1, 2, 3, 4, 5]}
  109. )
  110. expected = df.pivot_table(
  111. index="col1", values="col3", columns="col2", aggfunc="sum", fill_value=0
  112. )
  113. expected.index = expected.index.astype("category")
  114. expected.columns = expected.columns.astype("category")
  115. df.col1 = df.col1.astype("category")
  116. df.col2 = df.col2.astype("category")
  117. result = df.pivot_table(
  118. index="col1",
  119. values="col3",
  120. columns="col2",
  121. aggfunc="sum",
  122. fill_value=0,
  123. observed=observed,
  124. )
  125. tm.assert_frame_equal(result, expected)
  126. def test_pivot_table_nocols(self):
  127. df = DataFrame(
  128. {"rows": ["a", "b", "c"], "cols": ["x", "y", "z"], "values": [1, 2, 3]}
  129. )
  130. rs = df.pivot_table(columns="cols", aggfunc="sum")
  131. xp = df.pivot_table(index="cols", aggfunc="sum").T
  132. tm.assert_frame_equal(rs, xp)
  133. rs = df.pivot_table(columns="cols", aggfunc={"values": "mean"})
  134. xp = df.pivot_table(index="cols", aggfunc={"values": "mean"}).T
  135. tm.assert_frame_equal(rs, xp)
  136. def test_pivot_table_dropna(self):
  137. df = DataFrame(
  138. {
  139. "amount": {0: 60000, 1: 100000, 2: 50000, 3: 30000},
  140. "customer": {0: "A", 1: "A", 2: "B", 3: "C"},
  141. "month": {0: 201307, 1: 201309, 2: 201308, 3: 201310},
  142. "product": {0: "a", 1: "b", 2: "c", 3: "d"},
  143. "quantity": {0: 2000000, 1: 500000, 2: 1000000, 3: 1000000},
  144. }
  145. )
  146. pv_col = df.pivot_table(
  147. "quantity", "month", ["customer", "product"], dropna=False
  148. )
  149. pv_ind = df.pivot_table(
  150. "quantity", ["customer", "product"], "month", dropna=False
  151. )
  152. m = MultiIndex.from_tuples(
  153. [
  154. ("A", "a"),
  155. ("A", "b"),
  156. ("A", "c"),
  157. ("A", "d"),
  158. ("B", "a"),
  159. ("B", "b"),
  160. ("B", "c"),
  161. ("B", "d"),
  162. ("C", "a"),
  163. ("C", "b"),
  164. ("C", "c"),
  165. ("C", "d"),
  166. ],
  167. names=["customer", "product"],
  168. )
  169. tm.assert_index_equal(pv_col.columns, m)
  170. tm.assert_index_equal(pv_ind.index, m)
  171. def test_pivot_table_categorical(self):
  172. cat1 = Categorical(
  173. ["a", "a", "b", "b"], categories=["a", "b", "z"], ordered=True
  174. )
  175. cat2 = Categorical(
  176. ["c", "d", "c", "d"], categories=["c", "d", "y"], ordered=True
  177. )
  178. df = DataFrame({"A": cat1, "B": cat2, "values": [1, 2, 3, 4]})
  179. msg = "The default value of observed=False is deprecated"
  180. with tm.assert_produces_warning(FutureWarning, match=msg):
  181. result = pivot_table(df, values="values", index=["A", "B"], dropna=True)
  182. exp_index = MultiIndex.from_arrays([cat1, cat2], names=["A", "B"])
  183. expected = DataFrame({"values": [1.0, 2.0, 3.0, 4.0]}, index=exp_index)
  184. tm.assert_frame_equal(result, expected)
  185. def test_pivot_table_dropna_categoricals(self, dropna):
  186. # GH 15193
  187. categories = ["a", "b", "c", "d"]
  188. df = DataFrame(
  189. {
  190. "A": ["a", "a", "a", "b", "b", "b", "c", "c", "c"],
  191. "B": [1, 2, 3, 1, 2, 3, 1, 2, 3],
  192. "C": range(9),
  193. }
  194. )
  195. df["A"] = df["A"].astype(CategoricalDtype(categories, ordered=False))
  196. msg = "The default value of observed=False is deprecated"
  197. with tm.assert_produces_warning(FutureWarning, match=msg):
  198. result = df.pivot_table(index="B", columns="A", values="C", dropna=dropna)
  199. expected_columns = Series(["a", "b", "c"], name="A")
  200. expected_columns = expected_columns.astype(
  201. CategoricalDtype(categories, ordered=False)
  202. )
  203. expected_index = Series([1, 2, 3], name="B")
  204. expected = DataFrame(
  205. [[0.0, 3.0, 6.0], [1.0, 4.0, 7.0], [2.0, 5.0, 8.0]],
  206. index=expected_index,
  207. columns=expected_columns,
  208. )
  209. if not dropna:
  210. # add back the non observed to compare
  211. expected = expected.reindex(columns=Categorical(categories)).astype("float")
  212. tm.assert_frame_equal(result, expected)
  213. def test_pivot_with_non_observable_dropna(self, dropna):
  214. # gh-21133
  215. df = DataFrame(
  216. {
  217. "A": Categorical(
  218. [np.nan, "low", "high", "low", "high"],
  219. categories=["low", "high"],
  220. ordered=True,
  221. ),
  222. "B": [0.0, 1.0, 2.0, 3.0, 4.0],
  223. }
  224. )
  225. msg = "The default value of observed=False is deprecated"
  226. with tm.assert_produces_warning(FutureWarning, match=msg):
  227. result = df.pivot_table(index="A", values="B", dropna=dropna)
  228. if dropna:
  229. values = [2.0, 3.0]
  230. codes = [0, 1]
  231. else:
  232. # GH: 10772
  233. values = [2.0, 3.0, 0.0]
  234. codes = [0, 1, -1]
  235. expected = DataFrame(
  236. {"B": values},
  237. index=Index(
  238. Categorical.from_codes(
  239. codes, categories=["low", "high"], ordered=dropna
  240. ),
  241. name="A",
  242. ),
  243. )
  244. tm.assert_frame_equal(result, expected)
  245. def test_pivot_with_non_observable_dropna_multi_cat(self, dropna):
  246. # gh-21378
  247. df = DataFrame(
  248. {
  249. "A": Categorical(
  250. ["left", "low", "high", "low", "high"],
  251. categories=["low", "high", "left"],
  252. ordered=True,
  253. ),
  254. "B": range(5),
  255. }
  256. )
  257. msg = "The default value of observed=False is deprecated"
  258. with tm.assert_produces_warning(FutureWarning, match=msg):
  259. result = df.pivot_table(index="A", values="B", dropna=dropna)
  260. expected = DataFrame(
  261. {"B": [2.0, 3.0, 0.0]},
  262. index=Index(
  263. Categorical.from_codes(
  264. [0, 1, 2], categories=["low", "high", "left"], ordered=True
  265. ),
  266. name="A",
  267. ),
  268. )
  269. if not dropna:
  270. expected["B"] = expected["B"].astype(float)
  271. tm.assert_frame_equal(result, expected)
  272. def test_pivot_with_interval_index(self, interval_values, dropna):
  273. # GH 25814
  274. df = DataFrame({"A": interval_values, "B": 1})
  275. msg = "The default value of observed=False is deprecated"
  276. with tm.assert_produces_warning(FutureWarning, match=msg):
  277. result = df.pivot_table(index="A", values="B", dropna=dropna)
  278. expected = DataFrame(
  279. {"B": 1.0}, index=Index(interval_values.unique(), name="A")
  280. )
  281. if not dropna:
  282. expected = expected.astype(float)
  283. tm.assert_frame_equal(result, expected)
  284. def test_pivot_with_interval_index_margins(self):
  285. # GH 25815
  286. ordered_cat = pd.IntervalIndex.from_arrays([0, 0, 1, 1], [1, 1, 2, 2])
  287. df = DataFrame(
  288. {
  289. "A": np.arange(4, 0, -1, dtype=np.intp),
  290. "B": ["a", "b", "a", "b"],
  291. "C": Categorical(ordered_cat, ordered=True).sort_values(
  292. ascending=False
  293. ),
  294. }
  295. )
  296. msg = "The default value of observed=False is deprecated"
  297. with tm.assert_produces_warning(FutureWarning, match=msg):
  298. pivot_tab = pivot_table(
  299. df, index="C", columns="B", values="A", aggfunc="sum", margins=True
  300. )
  301. result = pivot_tab["All"]
  302. expected = Series(
  303. [3, 7, 10],
  304. index=Index([pd.Interval(0, 1), pd.Interval(1, 2), "All"], name="C"),
  305. name="All",
  306. dtype=np.intp,
  307. )
  308. tm.assert_series_equal(result, expected)
  309. def test_pass_array(self, data):
  310. result = data.pivot_table("D", index=data.A, columns=data.C)
  311. expected = data.pivot_table("D", index="A", columns="C")
  312. tm.assert_frame_equal(result, expected)
  313. def test_pass_function(self, data):
  314. result = data.pivot_table("D", index=lambda x: x // 5, columns=data.C)
  315. expected = data.pivot_table("D", index=data.index // 5, columns="C")
  316. tm.assert_frame_equal(result, expected)
  317. def test_pivot_table_multiple(self, data):
  318. index = ["A", "B"]
  319. columns = "C"
  320. table = pivot_table(data, index=index, columns=columns)
  321. expected = data.groupby(index + [columns]).agg("mean").unstack()
  322. tm.assert_frame_equal(table, expected)
  323. def test_pivot_dtypes(self):
  324. # can convert dtypes
  325. f = DataFrame(
  326. {
  327. "a": ["cat", "bat", "cat", "bat"],
  328. "v": [1, 2, 3, 4],
  329. "i": ["a", "b", "a", "b"],
  330. }
  331. )
  332. assert f.dtypes["v"] == "int64"
  333. z = pivot_table(
  334. f, values="v", index=["a"], columns=["i"], fill_value=0, aggfunc="sum"
  335. )
  336. result = z.dtypes
  337. expected = Series([np.dtype("int64")] * 2, index=Index(list("ab"), name="i"))
  338. tm.assert_series_equal(result, expected)
  339. # cannot convert dtypes
  340. f = DataFrame(
  341. {
  342. "a": ["cat", "bat", "cat", "bat"],
  343. "v": [1.5, 2.5, 3.5, 4.5],
  344. "i": ["a", "b", "a", "b"],
  345. }
  346. )
  347. assert f.dtypes["v"] == "float64"
  348. z = pivot_table(
  349. f, values="v", index=["a"], columns=["i"], fill_value=0, aggfunc="mean"
  350. )
  351. result = z.dtypes
  352. expected = Series([np.dtype("float64")] * 2, index=Index(list("ab"), name="i"))
  353. tm.assert_series_equal(result, expected)
  354. @pytest.mark.parametrize(
  355. "columns,values",
  356. [
  357. ("bool1", ["float1", "float2"]),
  358. ("bool1", ["float1", "float2", "bool1"]),
  359. ("bool2", ["float1", "float2", "bool1"]),
  360. ],
  361. )
  362. def test_pivot_preserve_dtypes(self, columns, values):
  363. # GH 7142 regression test
  364. v = np.arange(5, dtype=np.float64)
  365. df = DataFrame(
  366. {"float1": v, "float2": v + 2.0, "bool1": v <= 2, "bool2": v <= 3}
  367. )
  368. df_res = df.reset_index().pivot_table(
  369. index="index", columns=columns, values=values
  370. )
  371. result = dict(df_res.dtypes)
  372. expected = {col: np.dtype("float64") for col in df_res}
  373. assert result == expected
  374. def test_pivot_no_values(self):
  375. # GH 14380
  376. idx = pd.DatetimeIndex(
  377. ["2011-01-01", "2011-02-01", "2011-01-02", "2011-01-01", "2011-01-02"]
  378. )
  379. df = DataFrame({"A": [1, 2, 3, 4, 5]}, index=idx)
  380. res = df.pivot_table(index=df.index.month, columns=df.index.day)
  381. exp_columns = MultiIndex.from_tuples([("A", 1), ("A", 2)])
  382. exp_columns = exp_columns.set_levels(
  383. exp_columns.levels[1].astype(np.int32), level=1
  384. )
  385. exp = DataFrame(
  386. [[2.5, 4.0], [2.0, np.nan]],
  387. index=Index([1, 2], dtype=np.int32),
  388. columns=exp_columns,
  389. )
  390. tm.assert_frame_equal(res, exp)
  391. df = DataFrame(
  392. {
  393. "A": [1, 2, 3, 4, 5],
  394. "dt": date_range("2011-01-01", freq="D", periods=5),
  395. },
  396. index=idx,
  397. )
  398. res = df.pivot_table(index=df.index.month, columns=Grouper(key="dt", freq="ME"))
  399. exp_columns = MultiIndex.from_arrays(
  400. [["A"], pd.DatetimeIndex(["2011-01-31"], dtype="M8[ns]")],
  401. names=[None, "dt"],
  402. )
  403. exp = DataFrame(
  404. [3.25, 2.0], index=Index([1, 2], dtype=np.int32), columns=exp_columns
  405. )
  406. tm.assert_frame_equal(res, exp)
  407. res = df.pivot_table(
  408. index=Grouper(freq="YE"), columns=Grouper(key="dt", freq="ME")
  409. )
  410. exp = DataFrame(
  411. [3.0],
  412. index=pd.DatetimeIndex(["2011-12-31"], freq="YE"),
  413. columns=exp_columns,
  414. )
  415. tm.assert_frame_equal(res, exp)
  416. def test_pivot_multi_values(self, data):
  417. result = pivot_table(
  418. data, values=["D", "E"], index="A", columns=["B", "C"], fill_value=0
  419. )
  420. expected = pivot_table(
  421. data.drop(["F"], axis=1), index="A", columns=["B", "C"], fill_value=0
  422. )
  423. tm.assert_frame_equal(result, expected)
  424. def test_pivot_multi_functions(self, data):
  425. f = lambda func: pivot_table(
  426. data, values=["D", "E"], index=["A", "B"], columns="C", aggfunc=func
  427. )
  428. result = f(["mean", "std"])
  429. means = f("mean")
  430. stds = f("std")
  431. expected = concat([means, stds], keys=["mean", "std"], axis=1)
  432. tm.assert_frame_equal(result, expected)
  433. # margins not supported??
  434. f = lambda func: pivot_table(
  435. data,
  436. values=["D", "E"],
  437. index=["A", "B"],
  438. columns="C",
  439. aggfunc=func,
  440. margins=True,
  441. )
  442. result = f(["mean", "std"])
  443. means = f("mean")
  444. stds = f("std")
  445. expected = concat([means, stds], keys=["mean", "std"], axis=1)
  446. tm.assert_frame_equal(result, expected)
  447. @pytest.mark.parametrize("method", [True, False])
  448. def test_pivot_index_with_nan(self, method):
  449. # GH 3588
  450. nan = np.nan
  451. df = DataFrame(
  452. {
  453. "a": ["R1", "R2", nan, "R4"],
  454. "b": ["C1", "C2", "C3", "C4"],
  455. "c": [10, 15, 17, 20],
  456. }
  457. )
  458. if method:
  459. result = df.pivot(index="a", columns="b", values="c")
  460. else:
  461. result = pd.pivot(df, index="a", columns="b", values="c")
  462. expected = DataFrame(
  463. [
  464. [nan, nan, 17, nan],
  465. [10, nan, nan, nan],
  466. [nan, 15, nan, nan],
  467. [nan, nan, nan, 20],
  468. ],
  469. index=Index([nan, "R1", "R2", "R4"], name="a"),
  470. columns=Index(["C1", "C2", "C3", "C4"], name="b"),
  471. )
  472. tm.assert_frame_equal(result, expected)
  473. tm.assert_frame_equal(df.pivot(index="b", columns="a", values="c"), expected.T)
  474. @pytest.mark.parametrize("method", [True, False])
  475. def test_pivot_index_with_nan_dates(self, method):
  476. # GH9491
  477. df = DataFrame(
  478. {
  479. "a": date_range("2014-02-01", periods=6, freq="D"),
  480. "c": 100 + np.arange(6),
  481. }
  482. )
  483. df["b"] = df["a"] - pd.Timestamp("2014-02-02")
  484. df.loc[1, "a"] = df.loc[3, "a"] = np.nan
  485. df.loc[1, "b"] = df.loc[4, "b"] = np.nan
  486. if method:
  487. pv = df.pivot(index="a", columns="b", values="c")
  488. else:
  489. pv = pd.pivot(df, index="a", columns="b", values="c")
  490. assert pv.notna().values.sum() == len(df)
  491. for _, row in df.iterrows():
  492. assert pv.loc[row["a"], row["b"]] == row["c"]
  493. if method:
  494. result = df.pivot(index="b", columns="a", values="c")
  495. else:
  496. result = pd.pivot(df, index="b", columns="a", values="c")
  497. tm.assert_frame_equal(result, pv.T)
  498. @pytest.mark.parametrize("method", [True, False])
  499. def test_pivot_with_tz(self, method, unit):
  500. # GH 5878
  501. df = DataFrame(
  502. {
  503. "dt1": pd.DatetimeIndex(
  504. [
  505. datetime(2013, 1, 1, 9, 0),
  506. datetime(2013, 1, 2, 9, 0),
  507. datetime(2013, 1, 1, 9, 0),
  508. datetime(2013, 1, 2, 9, 0),
  509. ],
  510. dtype=f"M8[{unit}, US/Pacific]",
  511. ),
  512. "dt2": pd.DatetimeIndex(
  513. [
  514. datetime(2014, 1, 1, 9, 0),
  515. datetime(2014, 1, 1, 9, 0),
  516. datetime(2014, 1, 2, 9, 0),
  517. datetime(2014, 1, 2, 9, 0),
  518. ],
  519. dtype=f"M8[{unit}, Asia/Tokyo]",
  520. ),
  521. "data1": np.arange(4, dtype="int64"),
  522. "data2": np.arange(4, dtype="int64"),
  523. }
  524. )
  525. exp_col1 = Index(["data1", "data1", "data2", "data2"])
  526. exp_col2 = pd.DatetimeIndex(
  527. ["2014/01/01 09:00", "2014/01/02 09:00"] * 2,
  528. name="dt2",
  529. dtype=f"M8[{unit}, Asia/Tokyo]",
  530. )
  531. exp_col = MultiIndex.from_arrays([exp_col1, exp_col2])
  532. exp_idx = pd.DatetimeIndex(
  533. ["2013/01/01 09:00", "2013/01/02 09:00"],
  534. name="dt1",
  535. dtype=f"M8[{unit}, US/Pacific]",
  536. )
  537. expected = DataFrame(
  538. [[0, 2, 0, 2], [1, 3, 1, 3]],
  539. index=exp_idx,
  540. columns=exp_col,
  541. )
  542. if method:
  543. pv = df.pivot(index="dt1", columns="dt2")
  544. else:
  545. pv = pd.pivot(df, index="dt1", columns="dt2")
  546. tm.assert_frame_equal(pv, expected)
  547. expected = DataFrame(
  548. [[0, 2], [1, 3]],
  549. index=exp_idx,
  550. columns=exp_col2[:2],
  551. )
  552. if method:
  553. pv = df.pivot(index="dt1", columns="dt2", values="data1")
  554. else:
  555. pv = pd.pivot(df, index="dt1", columns="dt2", values="data1")
  556. tm.assert_frame_equal(pv, expected)
  557. def test_pivot_tz_in_values(self):
  558. # GH 14948
  559. df = DataFrame(
  560. [
  561. {
  562. "uid": "aa",
  563. "ts": pd.Timestamp("2016-08-12 13:00:00-0700", tz="US/Pacific"),
  564. },
  565. {
  566. "uid": "aa",
  567. "ts": pd.Timestamp("2016-08-12 08:00:00-0700", tz="US/Pacific"),
  568. },
  569. {
  570. "uid": "aa",
  571. "ts": pd.Timestamp("2016-08-12 14:00:00-0700", tz="US/Pacific"),
  572. },
  573. {
  574. "uid": "aa",
  575. "ts": pd.Timestamp("2016-08-25 11:00:00-0700", tz="US/Pacific"),
  576. },
  577. {
  578. "uid": "aa",
  579. "ts": pd.Timestamp("2016-08-25 13:00:00-0700", tz="US/Pacific"),
  580. },
  581. ]
  582. )
  583. df = df.set_index("ts").reset_index()
  584. mins = df.ts.map(lambda x: x.replace(hour=0, minute=0, second=0, microsecond=0))
  585. result = pivot_table(
  586. df.set_index("ts").reset_index(),
  587. values="ts",
  588. index=["uid"],
  589. columns=[mins],
  590. aggfunc="min",
  591. )
  592. expected = DataFrame(
  593. [
  594. [
  595. pd.Timestamp("2016-08-12 08:00:00-0700", tz="US/Pacific"),
  596. pd.Timestamp("2016-08-25 11:00:00-0700", tz="US/Pacific"),
  597. ]
  598. ],
  599. index=Index(["aa"], name="uid"),
  600. columns=pd.DatetimeIndex(
  601. [
  602. pd.Timestamp("2016-08-12 00:00:00", tz="US/Pacific"),
  603. pd.Timestamp("2016-08-25 00:00:00", tz="US/Pacific"),
  604. ],
  605. name="ts",
  606. ),
  607. )
  608. tm.assert_frame_equal(result, expected)
  609. @pytest.mark.parametrize("method", [True, False])
  610. def test_pivot_periods(self, method):
  611. df = DataFrame(
  612. {
  613. "p1": [
  614. pd.Period("2013-01-01", "D"),
  615. pd.Period("2013-01-02", "D"),
  616. pd.Period("2013-01-01", "D"),
  617. pd.Period("2013-01-02", "D"),
  618. ],
  619. "p2": [
  620. pd.Period("2013-01", "M"),
  621. pd.Period("2013-01", "M"),
  622. pd.Period("2013-02", "M"),
  623. pd.Period("2013-02", "M"),
  624. ],
  625. "data1": np.arange(4, dtype="int64"),
  626. "data2": np.arange(4, dtype="int64"),
  627. }
  628. )
  629. exp_col1 = Index(["data1", "data1", "data2", "data2"])
  630. exp_col2 = pd.PeriodIndex(["2013-01", "2013-02"] * 2, name="p2", freq="M")
  631. exp_col = MultiIndex.from_arrays([exp_col1, exp_col2])
  632. expected = DataFrame(
  633. [[0, 2, 0, 2], [1, 3, 1, 3]],
  634. index=pd.PeriodIndex(["2013-01-01", "2013-01-02"], name="p1", freq="D"),
  635. columns=exp_col,
  636. )
  637. if method:
  638. pv = df.pivot(index="p1", columns="p2")
  639. else:
  640. pv = pd.pivot(df, index="p1", columns="p2")
  641. tm.assert_frame_equal(pv, expected)
  642. expected = DataFrame(
  643. [[0, 2], [1, 3]],
  644. index=pd.PeriodIndex(["2013-01-01", "2013-01-02"], name="p1", freq="D"),
  645. columns=pd.PeriodIndex(["2013-01", "2013-02"], name="p2", freq="M"),
  646. )
  647. if method:
  648. pv = df.pivot(index="p1", columns="p2", values="data1")
  649. else:
  650. pv = pd.pivot(df, index="p1", columns="p2", values="data1")
  651. tm.assert_frame_equal(pv, expected)
  652. def test_pivot_periods_with_margins(self):
  653. # GH 28323
  654. df = DataFrame(
  655. {
  656. "a": [1, 1, 2, 2],
  657. "b": [
  658. pd.Period("2019Q1"),
  659. pd.Period("2019Q2"),
  660. pd.Period("2019Q1"),
  661. pd.Period("2019Q2"),
  662. ],
  663. "x": 1.0,
  664. }
  665. )
  666. expected = DataFrame(
  667. data=1.0,
  668. index=Index([1, 2, "All"], name="a"),
  669. columns=Index([pd.Period("2019Q1"), pd.Period("2019Q2"), "All"], name="b"),
  670. )
  671. result = df.pivot_table(index="a", columns="b", values="x", margins=True)
  672. tm.assert_frame_equal(expected, result)
  673. @pytest.mark.parametrize(
  674. "values",
  675. [
  676. ["baz", "zoo"],
  677. np.array(["baz", "zoo"]),
  678. Series(["baz", "zoo"]),
  679. Index(["baz", "zoo"]),
  680. ],
  681. )
  682. @pytest.mark.parametrize("method", [True, False])
  683. def test_pivot_with_list_like_values(self, values, method):
  684. # issue #17160
  685. df = DataFrame(
  686. {
  687. "foo": ["one", "one", "one", "two", "two", "two"],
  688. "bar": ["A", "B", "C", "A", "B", "C"],
  689. "baz": [1, 2, 3, 4, 5, 6],
  690. "zoo": ["x", "y", "z", "q", "w", "t"],
  691. }
  692. )
  693. if method:
  694. result = df.pivot(index="foo", columns="bar", values=values)
  695. else:
  696. result = pd.pivot(df, index="foo", columns="bar", values=values)
  697. data = [[1, 2, 3, "x", "y", "z"], [4, 5, 6, "q", "w", "t"]]
  698. index = Index(data=["one", "two"], name="foo")
  699. columns = MultiIndex(
  700. levels=[["baz", "zoo"], ["A", "B", "C"]],
  701. codes=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]],
  702. names=[None, "bar"],
  703. )
  704. expected = DataFrame(data=data, index=index, columns=columns)
  705. expected["baz"] = expected["baz"].astype(object)
  706. tm.assert_frame_equal(result, expected)
  707. @pytest.mark.parametrize(
  708. "values",
  709. [
  710. ["bar", "baz"],
  711. np.array(["bar", "baz"]),
  712. Series(["bar", "baz"]),
  713. Index(["bar", "baz"]),
  714. ],
  715. )
  716. @pytest.mark.parametrize("method", [True, False])
  717. def test_pivot_with_list_like_values_nans(self, values, method):
  718. # issue #17160
  719. df = DataFrame(
  720. {
  721. "foo": ["one", "one", "one", "two", "two", "two"],
  722. "bar": ["A", "B", "C", "A", "B", "C"],
  723. "baz": [1, 2, 3, 4, 5, 6],
  724. "zoo": ["x", "y", "z", "q", "w", "t"],
  725. }
  726. )
  727. if method:
  728. result = df.pivot(index="zoo", columns="foo", values=values)
  729. else:
  730. result = pd.pivot(df, index="zoo", columns="foo", values=values)
  731. data = [
  732. [np.nan, "A", np.nan, 4],
  733. [np.nan, "C", np.nan, 6],
  734. [np.nan, "B", np.nan, 5],
  735. ["A", np.nan, 1, np.nan],
  736. ["B", np.nan, 2, np.nan],
  737. ["C", np.nan, 3, np.nan],
  738. ]
  739. index = Index(data=["q", "t", "w", "x", "y", "z"], name="zoo")
  740. columns = MultiIndex(
  741. levels=[["bar", "baz"], ["one", "two"]],
  742. codes=[[0, 0, 1, 1], [0, 1, 0, 1]],
  743. names=[None, "foo"],
  744. )
  745. expected = DataFrame(data=data, index=index, columns=columns)
  746. expected["baz"] = expected["baz"].astype(object)
  747. tm.assert_frame_equal(result, expected)
  748. def test_pivot_columns_none_raise_error(self):
  749. # GH 30924
  750. df = DataFrame({"col1": ["a", "b", "c"], "col2": [1, 2, 3], "col3": [1, 2, 3]})
  751. msg = r"pivot\(\) missing 1 required keyword-only argument: 'columns'"
  752. with pytest.raises(TypeError, match=msg):
  753. df.pivot(index="col1", values="col3") # pylint: disable=missing-kwoa
  754. @pytest.mark.xfail(
  755. reason="MultiIndexed unstack with tuple names fails with KeyError GH#19966"
  756. )
  757. @pytest.mark.parametrize("method", [True, False])
  758. def test_pivot_with_multiindex(self, method):
  759. # issue #17160
  760. index = Index(data=[0, 1, 2, 3, 4, 5])
  761. data = [
  762. ["one", "A", 1, "x"],
  763. ["one", "B", 2, "y"],
  764. ["one", "C", 3, "z"],
  765. ["two", "A", 4, "q"],
  766. ["two", "B", 5, "w"],
  767. ["two", "C", 6, "t"],
  768. ]
  769. columns = MultiIndex(
  770. levels=[["bar", "baz"], ["first", "second"]],
  771. codes=[[0, 0, 1, 1], [0, 1, 0, 1]],
  772. )
  773. df = DataFrame(data=data, index=index, columns=columns, dtype="object")
  774. if method:
  775. result = df.pivot(
  776. index=("bar", "first"),
  777. columns=("bar", "second"),
  778. values=("baz", "first"),
  779. )
  780. else:
  781. result = pd.pivot(
  782. df,
  783. index=("bar", "first"),
  784. columns=("bar", "second"),
  785. values=("baz", "first"),
  786. )
  787. data = {
  788. "A": Series([1, 4], index=["one", "two"]),
  789. "B": Series([2, 5], index=["one", "two"]),
  790. "C": Series([3, 6], index=["one", "two"]),
  791. }
  792. expected = DataFrame(data)
  793. tm.assert_frame_equal(result, expected)
  794. @pytest.mark.parametrize("method", [True, False])
  795. def test_pivot_with_tuple_of_values(self, method):
  796. # issue #17160
  797. df = DataFrame(
  798. {
  799. "foo": ["one", "one", "one", "two", "two", "two"],
  800. "bar": ["A", "B", "C", "A", "B", "C"],
  801. "baz": [1, 2, 3, 4, 5, 6],
  802. "zoo": ["x", "y", "z", "q", "w", "t"],
  803. }
  804. )
  805. with pytest.raises(KeyError, match=r"^\('bar', 'baz'\)$"):
  806. # tuple is seen as a single column name
  807. if method:
  808. df.pivot(index="zoo", columns="foo", values=("bar", "baz"))
  809. else:
  810. pd.pivot(df, index="zoo", columns="foo", values=("bar", "baz"))
  811. def _check_output(
  812. self,
  813. result,
  814. values_col,
  815. data,
  816. index=["A", "B"],
  817. columns=["C"],
  818. margins_col="All",
  819. ):
  820. col_margins = result.loc[result.index[:-1], margins_col]
  821. expected_col_margins = data.groupby(index)[values_col].mean()
  822. tm.assert_series_equal(col_margins, expected_col_margins, check_names=False)
  823. assert col_margins.name == margins_col
  824. result = result.sort_index()
  825. index_margins = result.loc[(margins_col, "")].iloc[:-1]
  826. expected_ix_margins = data.groupby(columns)[values_col].mean()
  827. tm.assert_series_equal(index_margins, expected_ix_margins, check_names=False)
  828. assert index_margins.name == (margins_col, "")
  829. grand_total_margins = result.loc[(margins_col, ""), margins_col]
  830. expected_total_margins = data[values_col].mean()
  831. assert grand_total_margins == expected_total_margins
  832. def test_margins(self, data):
  833. # column specified
  834. result = data.pivot_table(
  835. values="D", index=["A", "B"], columns="C", margins=True, aggfunc="mean"
  836. )
  837. self._check_output(result, "D", data)
  838. # Set a different margins_name (not 'All')
  839. result = data.pivot_table(
  840. values="D",
  841. index=["A", "B"],
  842. columns="C",
  843. margins=True,
  844. aggfunc="mean",
  845. margins_name="Totals",
  846. )
  847. self._check_output(result, "D", data, margins_col="Totals")
  848. # no column specified
  849. table = data.pivot_table(
  850. index=["A", "B"], columns="C", margins=True, aggfunc="mean"
  851. )
  852. for value_col in table.columns.levels[0]:
  853. self._check_output(table[value_col], value_col, data)
  854. def test_no_col(self, data, using_infer_string):
  855. # no col
  856. # to help with a buglet
  857. data.columns = [k * 2 for k in data.columns]
  858. msg = re.escape("agg function failed [how->mean,dtype->")
  859. if using_infer_string:
  860. msg = "dtype 'str' does not support operation 'mean'"
  861. with pytest.raises(TypeError, match=msg):
  862. data.pivot_table(index=["AA", "BB"], margins=True, aggfunc="mean")
  863. table = data.drop(columns="CC").pivot_table(
  864. index=["AA", "BB"], margins=True, aggfunc="mean"
  865. )
  866. for value_col in table.columns:
  867. totals = table.loc[("All", ""), value_col]
  868. assert totals == data[value_col].mean()
  869. with pytest.raises(TypeError, match=msg):
  870. data.pivot_table(index=["AA", "BB"], margins=True, aggfunc="mean")
  871. table = data.drop(columns="CC").pivot_table(
  872. index=["AA", "BB"], margins=True, aggfunc="mean"
  873. )
  874. for item in ["DD", "EE", "FF"]:
  875. totals = table.loc[("All", ""), item]
  876. assert totals == data[item].mean()
  877. @pytest.mark.parametrize(
  878. "columns, aggfunc, values, expected_columns",
  879. [
  880. (
  881. "A",
  882. "mean",
  883. [[5.5, 5.5, 2.2, 2.2], [8.0, 8.0, 4.4, 4.4]],
  884. Index(["bar", "All", "foo", "All"], name="A"),
  885. ),
  886. (
  887. ["A", "B"],
  888. "sum",
  889. [
  890. [9, 13, 22, 5, 6, 11],
  891. [14, 18, 32, 11, 11, 22],
  892. ],
  893. MultiIndex.from_tuples(
  894. [
  895. ("bar", "one"),
  896. ("bar", "two"),
  897. ("bar", "All"),
  898. ("foo", "one"),
  899. ("foo", "two"),
  900. ("foo", "All"),
  901. ],
  902. names=["A", "B"],
  903. ),
  904. ),
  905. ],
  906. )
  907. def test_margin_with_only_columns_defined(
  908. self, columns, aggfunc, values, expected_columns, using_infer_string
  909. ):
  910. # GH 31016
  911. df = DataFrame(
  912. {
  913. "A": ["foo", "foo", "foo", "foo", "foo", "bar", "bar", "bar", "bar"],
  914. "B": ["one", "one", "one", "two", "two", "one", "one", "two", "two"],
  915. "C": [
  916. "small",
  917. "large",
  918. "large",
  919. "small",
  920. "small",
  921. "large",
  922. "small",
  923. "small",
  924. "large",
  925. ],
  926. "D": [1, 2, 2, 3, 3, 4, 5, 6, 7],
  927. "E": [2, 4, 5, 5, 6, 6, 8, 9, 9],
  928. }
  929. )
  930. if aggfunc != "sum":
  931. msg = re.escape("agg function failed [how->mean,dtype->")
  932. if using_infer_string:
  933. msg = "dtype 'str' does not support operation 'mean'"
  934. with pytest.raises(TypeError, match=msg):
  935. df.pivot_table(columns=columns, margins=True, aggfunc=aggfunc)
  936. if "B" not in columns:
  937. df = df.drop(columns="B")
  938. result = df.drop(columns="C").pivot_table(
  939. columns=columns, margins=True, aggfunc=aggfunc
  940. )
  941. expected = DataFrame(values, index=Index(["D", "E"]), columns=expected_columns)
  942. tm.assert_frame_equal(result, expected)
  943. def test_margins_dtype(self, data):
  944. # GH 17013
  945. df = data.copy()
  946. df[["D", "E", "F"]] = np.arange(len(df) * 3).reshape(len(df), 3).astype("i8")
  947. mi_val = list(product(["bar", "foo"], ["one", "two"])) + [("All", "")]
  948. mi = MultiIndex.from_tuples(mi_val, names=("A", "B"))
  949. expected = DataFrame(
  950. {"dull": [12, 21, 3, 9, 45], "shiny": [33, 0, 36, 51, 120]}, index=mi
  951. ).rename_axis("C", axis=1)
  952. expected["All"] = expected["dull"] + expected["shiny"]
  953. result = df.pivot_table(
  954. values="D",
  955. index=["A", "B"],
  956. columns="C",
  957. margins=True,
  958. aggfunc="sum",
  959. fill_value=0,
  960. )
  961. tm.assert_frame_equal(expected, result)
  962. def test_margins_dtype_len(self, data):
  963. mi_val = list(product(["bar", "foo"], ["one", "two"])) + [("All", "")]
  964. mi = MultiIndex.from_tuples(mi_val, names=("A", "B"))
  965. expected = DataFrame(
  966. {"dull": [1, 1, 2, 1, 5], "shiny": [2, 0, 2, 2, 6]}, index=mi
  967. ).rename_axis("C", axis=1)
  968. expected["All"] = expected["dull"] + expected["shiny"]
  969. result = data.pivot_table(
  970. values="D",
  971. index=["A", "B"],
  972. columns="C",
  973. margins=True,
  974. aggfunc=len,
  975. fill_value=0,
  976. )
  977. tm.assert_frame_equal(expected, result)
  978. @pytest.mark.parametrize("cols", [(1, 2), ("a", "b"), (1, "b"), ("a", 1)])
  979. def test_pivot_table_multiindex_only(self, cols):
  980. # GH 17038
  981. df2 = DataFrame({cols[0]: [1, 2, 3], cols[1]: [1, 2, 3], "v": [4, 5, 6]})
  982. result = df2.pivot_table(values="v", columns=cols)
  983. expected = DataFrame(
  984. [[4.0, 5.0, 6.0]],
  985. columns=MultiIndex.from_tuples([(1, 1), (2, 2), (3, 3)], names=cols),
  986. index=Index(["v"], dtype="str" if cols == ("a", "b") else "object"),
  987. )
  988. tm.assert_frame_equal(result, expected)
  989. def test_pivot_table_retains_tz(self):
  990. dti = date_range("2016-01-01", periods=3, tz="Europe/Amsterdam")
  991. df = DataFrame(
  992. {
  993. "A": np.random.default_rng(2).standard_normal(3),
  994. "B": np.random.default_rng(2).standard_normal(3),
  995. "C": dti,
  996. }
  997. )
  998. result = df.pivot_table(index=["B", "C"], dropna=False)
  999. # check tz retention
  1000. assert result.index.levels[1].equals(dti)
  1001. def test_pivot_integer_columns(self):
  1002. # caused by upstream bug in unstack
  1003. d = date.min
  1004. data = list(
  1005. product(
  1006. ["foo", "bar"],
  1007. ["A", "B", "C"],
  1008. ["x1", "x2"],
  1009. [d + timedelta(i) for i in range(20)],
  1010. [1.0],
  1011. )
  1012. )
  1013. df = DataFrame(data)
  1014. table = df.pivot_table(values=4, index=[0, 1, 3], columns=[2])
  1015. df2 = df.rename(columns=str)
  1016. table2 = df2.pivot_table(values="4", index=["0", "1", "3"], columns=["2"])
  1017. tm.assert_frame_equal(table, table2, check_names=False)
  1018. def test_pivot_no_level_overlap(self):
  1019. # GH #1181
  1020. data = DataFrame(
  1021. {
  1022. "a": ["a", "a", "a", "a", "b", "b", "b", "b"] * 2,
  1023. "b": [0, 0, 0, 0, 1, 1, 1, 1] * 2,
  1024. "c": (["foo"] * 4 + ["bar"] * 4) * 2,
  1025. "value": np.random.default_rng(2).standard_normal(16),
  1026. }
  1027. )
  1028. table = data.pivot_table("value", index="a", columns=["b", "c"])
  1029. grouped = data.groupby(["a", "b", "c"])["value"].mean()
  1030. expected = grouped.unstack("b").unstack("c").dropna(axis=1, how="all")
  1031. tm.assert_frame_equal(table, expected)
  1032. def test_pivot_columns_lexsorted(self):
  1033. n = 10000
  1034. dtype = np.dtype(
  1035. [
  1036. ("Index", object),
  1037. ("Symbol", object),
  1038. ("Year", int),
  1039. ("Month", int),
  1040. ("Day", int),
  1041. ("Quantity", int),
  1042. ("Price", float),
  1043. ]
  1044. )
  1045. products = np.array(
  1046. [
  1047. ("SP500", "ADBE"),
  1048. ("SP500", "NVDA"),
  1049. ("SP500", "ORCL"),
  1050. ("NDQ100", "AAPL"),
  1051. ("NDQ100", "MSFT"),
  1052. ("NDQ100", "GOOG"),
  1053. ("FTSE", "DGE.L"),
  1054. ("FTSE", "TSCO.L"),
  1055. ("FTSE", "GSK.L"),
  1056. ],
  1057. dtype=[("Index", object), ("Symbol", object)],
  1058. )
  1059. items = np.empty(n, dtype=dtype)
  1060. iproduct = np.random.default_rng(2).integers(0, len(products), n)
  1061. items["Index"] = products["Index"][iproduct]
  1062. items["Symbol"] = products["Symbol"][iproduct]
  1063. dr = date_range(date(2000, 1, 1), date(2010, 12, 31))
  1064. dates = dr[np.random.default_rng(2).integers(0, len(dr), n)]
  1065. items["Year"] = dates.year
  1066. items["Month"] = dates.month
  1067. items["Day"] = dates.day
  1068. items["Price"] = np.random.default_rng(2).lognormal(4.0, 2.0, n)
  1069. df = DataFrame(items)
  1070. pivoted = df.pivot_table(
  1071. "Price",
  1072. index=["Month", "Day"],
  1073. columns=["Index", "Symbol", "Year"],
  1074. aggfunc="mean",
  1075. )
  1076. assert pivoted.columns.is_monotonic_increasing
  1077. def test_pivot_complex_aggfunc(self, data):
  1078. f = {"D": ["std"], "E": ["sum"]}
  1079. expected = data.groupby(["A", "B"]).agg(f).unstack("B")
  1080. result = data.pivot_table(index="A", columns="B", aggfunc=f)
  1081. tm.assert_frame_equal(result, expected)
  1082. def test_margins_no_values_no_cols(self, data):
  1083. # Regression test on pivot table: no values or cols passed.
  1084. result = data[["A", "B"]].pivot_table(
  1085. index=["A", "B"], aggfunc=len, margins=True
  1086. )
  1087. result_list = result.tolist()
  1088. assert sum(result_list[:-1]) == result_list[-1]
  1089. def test_margins_no_values_two_rows(self, data):
  1090. # Regression test on pivot table: no values passed but rows are a
  1091. # multi-index
  1092. result = data[["A", "B", "C"]].pivot_table(
  1093. index=["A", "B"], columns="C", aggfunc=len, margins=True
  1094. )
  1095. assert result.All.tolist() == [3.0, 1.0, 4.0, 3.0, 11.0]
  1096. def test_margins_no_values_one_row_one_col(self, data):
  1097. # Regression test on pivot table: no values passed but row and col
  1098. # defined
  1099. result = data[["A", "B"]].pivot_table(
  1100. index="A", columns="B", aggfunc=len, margins=True
  1101. )
  1102. assert result.All.tolist() == [4.0, 7.0, 11.0]
  1103. def test_margins_no_values_two_row_two_cols(self, data):
  1104. # Regression test on pivot table: no values passed but rows and cols
  1105. # are multi-indexed
  1106. data["D"] = ["a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k"]
  1107. result = data[["A", "B", "C", "D"]].pivot_table(
  1108. index=["A", "B"], columns=["C", "D"], aggfunc=len, margins=True
  1109. )
  1110. assert result.All.tolist() == [3.0, 1.0, 4.0, 3.0, 11.0]
  1111. @pytest.mark.parametrize("margin_name", ["foo", "one", 666, None, ["a", "b"]])
  1112. def test_pivot_table_with_margins_set_margin_name(self, margin_name, data):
  1113. # see gh-3335
  1114. msg = (
  1115. f'Conflicting name "{margin_name}" in margins|'
  1116. "margins_name argument must be a string"
  1117. )
  1118. with pytest.raises(ValueError, match=msg):
  1119. # multi-index index
  1120. pivot_table(
  1121. data,
  1122. values="D",
  1123. index=["A", "B"],
  1124. columns=["C"],
  1125. margins=True,
  1126. margins_name=margin_name,
  1127. )
  1128. with pytest.raises(ValueError, match=msg):
  1129. # multi-index column
  1130. pivot_table(
  1131. data,
  1132. values="D",
  1133. index=["C"],
  1134. columns=["A", "B"],
  1135. margins=True,
  1136. margins_name=margin_name,
  1137. )
  1138. with pytest.raises(ValueError, match=msg):
  1139. # non-multi-index index/column
  1140. pivot_table(
  1141. data,
  1142. values="D",
  1143. index=["A"],
  1144. columns=["B"],
  1145. margins=True,
  1146. margins_name=margin_name,
  1147. )
  1148. def test_pivot_timegrouper(self, using_array_manager):
  1149. df = DataFrame(
  1150. {
  1151. "Branch": "A A A A A A A B".split(),
  1152. "Buyer": "Carl Mark Carl Carl Joe Joe Joe Carl".split(),
  1153. "Quantity": [1, 3, 5, 1, 8, 1, 9, 3],
  1154. "Date": [
  1155. datetime(2013, 1, 1),
  1156. datetime(2013, 1, 1),
  1157. datetime(2013, 10, 1),
  1158. datetime(2013, 10, 2),
  1159. datetime(2013, 10, 1),
  1160. datetime(2013, 10, 2),
  1161. datetime(2013, 12, 2),
  1162. datetime(2013, 12, 2),
  1163. ],
  1164. }
  1165. ).set_index("Date")
  1166. expected = DataFrame(
  1167. np.array([10, 18, 3], dtype="int64").reshape(1, 3),
  1168. index=pd.DatetimeIndex([datetime(2013, 12, 31)], freq="YE"),
  1169. columns="Carl Joe Mark".split(),
  1170. )
  1171. expected.index.name = "Date"
  1172. expected.columns.name = "Buyer"
  1173. result = pivot_table(
  1174. df,
  1175. index=Grouper(freq="YE"),
  1176. columns="Buyer",
  1177. values="Quantity",
  1178. aggfunc="sum",
  1179. )
  1180. tm.assert_frame_equal(result, expected)
  1181. result = pivot_table(
  1182. df,
  1183. index="Buyer",
  1184. columns=Grouper(freq="YE"),
  1185. values="Quantity",
  1186. aggfunc="sum",
  1187. )
  1188. tm.assert_frame_equal(result, expected.T)
  1189. expected = DataFrame(
  1190. np.array([1, np.nan, 3, 9, 18, np.nan]).reshape(2, 3),
  1191. index=pd.DatetimeIndex(
  1192. [datetime(2013, 1, 1), datetime(2013, 7, 1)], freq="6MS"
  1193. ),
  1194. columns="Carl Joe Mark".split(),
  1195. )
  1196. expected.index.name = "Date"
  1197. expected.columns.name = "Buyer"
  1198. if using_array_manager:
  1199. # INFO(ArrayManager) column without NaNs can preserve int dtype
  1200. expected["Carl"] = expected["Carl"].astype("int64")
  1201. result = pivot_table(
  1202. df,
  1203. index=Grouper(freq="6MS"),
  1204. columns="Buyer",
  1205. values="Quantity",
  1206. aggfunc="sum",
  1207. )
  1208. tm.assert_frame_equal(result, expected)
  1209. result = pivot_table(
  1210. df,
  1211. index="Buyer",
  1212. columns=Grouper(freq="6MS"),
  1213. values="Quantity",
  1214. aggfunc="sum",
  1215. )
  1216. tm.assert_frame_equal(result, expected.T)
  1217. # passing the name
  1218. df = df.reset_index()
  1219. result = pivot_table(
  1220. df,
  1221. index=Grouper(freq="6MS", key="Date"),
  1222. columns="Buyer",
  1223. values="Quantity",
  1224. aggfunc="sum",
  1225. )
  1226. tm.assert_frame_equal(result, expected)
  1227. result = pivot_table(
  1228. df,
  1229. index="Buyer",
  1230. columns=Grouper(freq="6MS", key="Date"),
  1231. values="Quantity",
  1232. aggfunc="sum",
  1233. )
  1234. tm.assert_frame_equal(result, expected.T)
  1235. msg = "'The grouper name foo is not found'"
  1236. with pytest.raises(KeyError, match=msg):
  1237. pivot_table(
  1238. df,
  1239. index=Grouper(freq="6MS", key="foo"),
  1240. columns="Buyer",
  1241. values="Quantity",
  1242. aggfunc="sum",
  1243. )
  1244. with pytest.raises(KeyError, match=msg):
  1245. pivot_table(
  1246. df,
  1247. index="Buyer",
  1248. columns=Grouper(freq="6MS", key="foo"),
  1249. values="Quantity",
  1250. aggfunc="sum",
  1251. )
  1252. # passing the level
  1253. df = df.set_index("Date")
  1254. result = pivot_table(
  1255. df,
  1256. index=Grouper(freq="6MS", level="Date"),
  1257. columns="Buyer",
  1258. values="Quantity",
  1259. aggfunc="sum",
  1260. )
  1261. tm.assert_frame_equal(result, expected)
  1262. result = pivot_table(
  1263. df,
  1264. index="Buyer",
  1265. columns=Grouper(freq="6MS", level="Date"),
  1266. values="Quantity",
  1267. aggfunc="sum",
  1268. )
  1269. tm.assert_frame_equal(result, expected.T)
  1270. msg = "The level foo is not valid"
  1271. with pytest.raises(ValueError, match=msg):
  1272. pivot_table(
  1273. df,
  1274. index=Grouper(freq="6MS", level="foo"),
  1275. columns="Buyer",
  1276. values="Quantity",
  1277. aggfunc="sum",
  1278. )
  1279. with pytest.raises(ValueError, match=msg):
  1280. pivot_table(
  1281. df,
  1282. index="Buyer",
  1283. columns=Grouper(freq="6MS", level="foo"),
  1284. values="Quantity",
  1285. aggfunc="sum",
  1286. )
  1287. def test_pivot_timegrouper_double(self):
  1288. # double grouper
  1289. df = DataFrame(
  1290. {
  1291. "Branch": "A A A A A A A B".split(),
  1292. "Buyer": "Carl Mark Carl Carl Joe Joe Joe Carl".split(),
  1293. "Quantity": [1, 3, 5, 1, 8, 1, 9, 3],
  1294. "Date": [
  1295. datetime(2013, 11, 1, 13, 0),
  1296. datetime(2013, 9, 1, 13, 5),
  1297. datetime(2013, 10, 1, 20, 0),
  1298. datetime(2013, 10, 2, 10, 0),
  1299. datetime(2013, 11, 1, 20, 0),
  1300. datetime(2013, 10, 2, 10, 0),
  1301. datetime(2013, 10, 2, 12, 0),
  1302. datetime(2013, 12, 5, 14, 0),
  1303. ],
  1304. "PayDay": [
  1305. datetime(2013, 10, 4, 0, 0),
  1306. datetime(2013, 10, 15, 13, 5),
  1307. datetime(2013, 9, 5, 20, 0),
  1308. datetime(2013, 11, 2, 10, 0),
  1309. datetime(2013, 10, 7, 20, 0),
  1310. datetime(2013, 9, 5, 10, 0),
  1311. datetime(2013, 12, 30, 12, 0),
  1312. datetime(2013, 11, 20, 14, 0),
  1313. ],
  1314. }
  1315. )
  1316. result = pivot_table(
  1317. df,
  1318. index=Grouper(freq="ME", key="Date"),
  1319. columns=Grouper(freq="ME", key="PayDay"),
  1320. values="Quantity",
  1321. aggfunc="sum",
  1322. )
  1323. expected = DataFrame(
  1324. np.array(
  1325. [
  1326. np.nan,
  1327. 3,
  1328. np.nan,
  1329. np.nan,
  1330. 6,
  1331. np.nan,
  1332. 1,
  1333. 9,
  1334. np.nan,
  1335. 9,
  1336. np.nan,
  1337. np.nan,
  1338. np.nan,
  1339. np.nan,
  1340. 3,
  1341. np.nan,
  1342. ]
  1343. ).reshape(4, 4),
  1344. index=pd.DatetimeIndex(
  1345. [
  1346. datetime(2013, 9, 30),
  1347. datetime(2013, 10, 31),
  1348. datetime(2013, 11, 30),
  1349. datetime(2013, 12, 31),
  1350. ],
  1351. freq="ME",
  1352. ),
  1353. columns=pd.DatetimeIndex(
  1354. [
  1355. datetime(2013, 9, 30),
  1356. datetime(2013, 10, 31),
  1357. datetime(2013, 11, 30),
  1358. datetime(2013, 12, 31),
  1359. ],
  1360. freq="ME",
  1361. ),
  1362. )
  1363. expected.index.name = "Date"
  1364. expected.columns.name = "PayDay"
  1365. tm.assert_frame_equal(result, expected)
  1366. result = pivot_table(
  1367. df,
  1368. index=Grouper(freq="ME", key="PayDay"),
  1369. columns=Grouper(freq="ME", key="Date"),
  1370. values="Quantity",
  1371. aggfunc="sum",
  1372. )
  1373. tm.assert_frame_equal(result, expected.T)
  1374. tuples = [
  1375. (datetime(2013, 9, 30), datetime(2013, 10, 31)),
  1376. (datetime(2013, 10, 31), datetime(2013, 9, 30)),
  1377. (datetime(2013, 10, 31), datetime(2013, 11, 30)),
  1378. (datetime(2013, 10, 31), datetime(2013, 12, 31)),
  1379. (datetime(2013, 11, 30), datetime(2013, 10, 31)),
  1380. (datetime(2013, 12, 31), datetime(2013, 11, 30)),
  1381. ]
  1382. idx = MultiIndex.from_tuples(tuples, names=["Date", "PayDay"])
  1383. expected = DataFrame(
  1384. np.array(
  1385. [3, np.nan, 6, np.nan, 1, np.nan, 9, np.nan, 9, np.nan, np.nan, 3]
  1386. ).reshape(6, 2),
  1387. index=idx,
  1388. columns=["A", "B"],
  1389. )
  1390. expected.columns.name = "Branch"
  1391. result = pivot_table(
  1392. df,
  1393. index=[Grouper(freq="ME", key="Date"), Grouper(freq="ME", key="PayDay")],
  1394. columns=["Branch"],
  1395. values="Quantity",
  1396. aggfunc="sum",
  1397. )
  1398. tm.assert_frame_equal(result, expected)
  1399. result = pivot_table(
  1400. df,
  1401. index=["Branch"],
  1402. columns=[Grouper(freq="ME", key="Date"), Grouper(freq="ME", key="PayDay")],
  1403. values="Quantity",
  1404. aggfunc="sum",
  1405. )
  1406. tm.assert_frame_equal(result, expected.T)
  1407. def test_pivot_datetime_tz(self):
  1408. dates1 = pd.DatetimeIndex(
  1409. [
  1410. "2011-07-19 07:00:00",
  1411. "2011-07-19 08:00:00",
  1412. "2011-07-19 09:00:00",
  1413. "2011-07-19 07:00:00",
  1414. "2011-07-19 08:00:00",
  1415. "2011-07-19 09:00:00",
  1416. ],
  1417. dtype="M8[ns, US/Pacific]",
  1418. name="dt1",
  1419. )
  1420. dates2 = pd.DatetimeIndex(
  1421. [
  1422. "2013-01-01 15:00:00",
  1423. "2013-01-01 15:00:00",
  1424. "2013-01-01 15:00:00",
  1425. "2013-02-01 15:00:00",
  1426. "2013-02-01 15:00:00",
  1427. "2013-02-01 15:00:00",
  1428. ],
  1429. dtype="M8[ns, Asia/Tokyo]",
  1430. )
  1431. df = DataFrame(
  1432. {
  1433. "label": ["a", "a", "a", "b", "b", "b"],
  1434. "dt1": dates1,
  1435. "dt2": dates2,
  1436. "value1": np.arange(6, dtype="int64"),
  1437. "value2": [1, 2] * 3,
  1438. }
  1439. )
  1440. exp_idx = dates1[:3]
  1441. exp_col1 = Index(["value1", "value1"])
  1442. exp_col2 = Index(["a", "b"], name="label")
  1443. exp_col = MultiIndex.from_arrays([exp_col1, exp_col2])
  1444. expected = DataFrame(
  1445. [[0.0, 3.0], [1.0, 4.0], [2.0, 5.0]], index=exp_idx, columns=exp_col
  1446. )
  1447. result = pivot_table(df, index=["dt1"], columns=["label"], values=["value1"])
  1448. tm.assert_frame_equal(result, expected)
  1449. exp_col1 = Index(["sum", "sum", "sum", "sum", "mean", "mean", "mean", "mean"])
  1450. exp_col2 = Index(["value1", "value1", "value2", "value2"] * 2)
  1451. exp_col3 = pd.DatetimeIndex(
  1452. ["2013-01-01 15:00:00", "2013-02-01 15:00:00"] * 4,
  1453. dtype="M8[ns, Asia/Tokyo]",
  1454. name="dt2",
  1455. )
  1456. exp_col = MultiIndex.from_arrays([exp_col1, exp_col2, exp_col3])
  1457. expected1 = DataFrame(
  1458. np.array(
  1459. [
  1460. [
  1461. 0,
  1462. 3,
  1463. 1,
  1464. 2,
  1465. ],
  1466. [1, 4, 2, 1],
  1467. [2, 5, 1, 2],
  1468. ],
  1469. dtype="int64",
  1470. ),
  1471. index=exp_idx,
  1472. columns=exp_col[:4],
  1473. )
  1474. expected2 = DataFrame(
  1475. np.array(
  1476. [
  1477. [0.0, 3.0, 1.0, 2.0],
  1478. [1.0, 4.0, 2.0, 1.0],
  1479. [2.0, 5.0, 1.0, 2.0],
  1480. ],
  1481. ),
  1482. index=exp_idx,
  1483. columns=exp_col[4:],
  1484. )
  1485. expected = concat([expected1, expected2], axis=1)
  1486. result = pivot_table(
  1487. df,
  1488. index=["dt1"],
  1489. columns=["dt2"],
  1490. values=["value1", "value2"],
  1491. aggfunc=["sum", "mean"],
  1492. )
  1493. tm.assert_frame_equal(result, expected)
  1494. def test_pivot_dtaccessor(self):
  1495. # GH 8103
  1496. dates1 = pd.DatetimeIndex(
  1497. [
  1498. "2011-07-19 07:00:00",
  1499. "2011-07-19 08:00:00",
  1500. "2011-07-19 09:00:00",
  1501. "2011-07-19 07:00:00",
  1502. "2011-07-19 08:00:00",
  1503. "2011-07-19 09:00:00",
  1504. ]
  1505. )
  1506. dates2 = pd.DatetimeIndex(
  1507. [
  1508. "2013-01-01 15:00:00",
  1509. "2013-01-01 15:00:00",
  1510. "2013-01-01 15:00:00",
  1511. "2013-02-01 15:00:00",
  1512. "2013-02-01 15:00:00",
  1513. "2013-02-01 15:00:00",
  1514. ]
  1515. )
  1516. df = DataFrame(
  1517. {
  1518. "label": ["a", "a", "a", "b", "b", "b"],
  1519. "dt1": dates1,
  1520. "dt2": dates2,
  1521. "value1": np.arange(6, dtype="int64"),
  1522. "value2": [1, 2] * 3,
  1523. }
  1524. )
  1525. result = pivot_table(
  1526. df, index="label", columns=df["dt1"].dt.hour, values="value1"
  1527. )
  1528. exp_idx = Index(["a", "b"], name="label")
  1529. expected = DataFrame(
  1530. {7: [0.0, 3.0], 8: [1.0, 4.0], 9: [2.0, 5.0]},
  1531. index=exp_idx,
  1532. columns=Index([7, 8, 9], dtype=np.int32, name="dt1"),
  1533. )
  1534. tm.assert_frame_equal(result, expected)
  1535. result = pivot_table(
  1536. df, index=df["dt2"].dt.month, columns=df["dt1"].dt.hour, values="value1"
  1537. )
  1538. expected = DataFrame(
  1539. {7: [0.0, 3.0], 8: [1.0, 4.0], 9: [2.0, 5.0]},
  1540. index=Index([1, 2], dtype=np.int32, name="dt2"),
  1541. columns=Index([7, 8, 9], dtype=np.int32, name="dt1"),
  1542. )
  1543. tm.assert_frame_equal(result, expected)
  1544. result = pivot_table(
  1545. df,
  1546. index=df["dt2"].dt.year.values,
  1547. columns=[df["dt1"].dt.hour, df["dt2"].dt.month],
  1548. values="value1",
  1549. )
  1550. exp_col = MultiIndex.from_arrays(
  1551. [
  1552. np.array([7, 7, 8, 8, 9, 9], dtype=np.int32),
  1553. np.array([1, 2] * 3, dtype=np.int32),
  1554. ],
  1555. names=["dt1", "dt2"],
  1556. )
  1557. expected = DataFrame(
  1558. np.array([[0.0, 3.0, 1.0, 4.0, 2.0, 5.0]]),
  1559. index=Index([2013], dtype=np.int32),
  1560. columns=exp_col,
  1561. )
  1562. tm.assert_frame_equal(result, expected)
  1563. result = pivot_table(
  1564. df,
  1565. index=np.array(["X", "X", "X", "X", "Y", "Y"]),
  1566. columns=[df["dt1"].dt.hour, df["dt2"].dt.month],
  1567. values="value1",
  1568. )
  1569. expected = DataFrame(
  1570. np.array(
  1571. [[0, 3, 1, np.nan, 2, np.nan], [np.nan, np.nan, np.nan, 4, np.nan, 5]]
  1572. ),
  1573. index=["X", "Y"],
  1574. columns=exp_col,
  1575. )
  1576. tm.assert_frame_equal(result, expected)
  1577. def test_daily(self):
  1578. rng = date_range("1/1/2000", "12/31/2004", freq="D")
  1579. ts = Series(np.arange(len(rng)), index=rng)
  1580. result = pivot_table(
  1581. DataFrame(ts), index=ts.index.year, columns=ts.index.dayofyear
  1582. )
  1583. result.columns = result.columns.droplevel(0)
  1584. doy = np.asarray(ts.index.dayofyear)
  1585. expected = {}
  1586. for y in ts.index.year.unique().values:
  1587. mask = ts.index.year == y
  1588. expected[y] = Series(ts.values[mask], index=doy[mask])
  1589. expected = DataFrame(expected, dtype=float).T
  1590. tm.assert_frame_equal(result, expected)
  1591. def test_monthly(self):
  1592. rng = date_range("1/1/2000", "12/31/2004", freq="ME")
  1593. ts = Series(np.arange(len(rng)), index=rng)
  1594. result = pivot_table(DataFrame(ts), index=ts.index.year, columns=ts.index.month)
  1595. result.columns = result.columns.droplevel(0)
  1596. month = np.asarray(ts.index.month)
  1597. expected = {}
  1598. for y in ts.index.year.unique().values:
  1599. mask = ts.index.year == y
  1600. expected[y] = Series(ts.values[mask], index=month[mask])
  1601. expected = DataFrame(expected, dtype=float).T
  1602. tm.assert_frame_equal(result, expected)
  1603. def test_pivot_table_with_iterator_values(self, data):
  1604. # GH 12017
  1605. aggs = {"D": "sum", "E": "mean"}
  1606. pivot_values_list = pivot_table(
  1607. data, index=["A"], values=list(aggs.keys()), aggfunc=aggs
  1608. )
  1609. pivot_values_keys = pivot_table(
  1610. data, index=["A"], values=aggs.keys(), aggfunc=aggs
  1611. )
  1612. tm.assert_frame_equal(pivot_values_keys, pivot_values_list)
  1613. agg_values_gen = (value for value in aggs)
  1614. pivot_values_gen = pivot_table(
  1615. data, index=["A"], values=agg_values_gen, aggfunc=aggs
  1616. )
  1617. tm.assert_frame_equal(pivot_values_gen, pivot_values_list)
  1618. def test_pivot_table_margins_name_with_aggfunc_list(self):
  1619. # GH 13354
  1620. margins_name = "Weekly"
  1621. costs = DataFrame(
  1622. {
  1623. "item": ["bacon", "cheese", "bacon", "cheese"],
  1624. "cost": [2.5, 4.5, 3.2, 3.3],
  1625. "day": ["ME", "ME", "T", "T"],
  1626. }
  1627. )
  1628. table = costs.pivot_table(
  1629. index="item",
  1630. columns="day",
  1631. margins=True,
  1632. margins_name=margins_name,
  1633. aggfunc=["mean", "max"],
  1634. )
  1635. ix = Index(["bacon", "cheese", margins_name], name="item")
  1636. tups = [
  1637. ("mean", "cost", "ME"),
  1638. ("mean", "cost", "T"),
  1639. ("mean", "cost", margins_name),
  1640. ("max", "cost", "ME"),
  1641. ("max", "cost", "T"),
  1642. ("max", "cost", margins_name),
  1643. ]
  1644. cols = MultiIndex.from_tuples(tups, names=[None, None, "day"])
  1645. expected = DataFrame(table.values, index=ix, columns=cols)
  1646. tm.assert_frame_equal(table, expected)
  1647. def test_categorical_margins(self, observed):
  1648. # GH 10989
  1649. df = DataFrame(
  1650. {"x": np.arange(8), "y": np.arange(8) // 4, "z": np.arange(8) % 2}
  1651. )
  1652. expected = DataFrame([[1.0, 2.0, 1.5], [5, 6, 5.5], [3, 4, 3.5]])
  1653. expected.index = Index([0, 1, "All"], name="y")
  1654. expected.columns = Index([0, 1, "All"], name="z")
  1655. table = df.pivot_table("x", "y", "z", dropna=observed, margins=True)
  1656. tm.assert_frame_equal(table, expected)
  1657. def test_categorical_margins_category(self, observed):
  1658. df = DataFrame(
  1659. {"x": np.arange(8), "y": np.arange(8) // 4, "z": np.arange(8) % 2}
  1660. )
  1661. expected = DataFrame([[1.0, 2.0, 1.5], [5, 6, 5.5], [3, 4, 3.5]])
  1662. expected.index = Index([0, 1, "All"], name="y")
  1663. expected.columns = Index([0, 1, "All"], name="z")
  1664. df.y = df.y.astype("category")
  1665. df.z = df.z.astype("category")
  1666. msg = "The default value of observed=False is deprecated"
  1667. with tm.assert_produces_warning(FutureWarning, match=msg):
  1668. table = df.pivot_table("x", "y", "z", dropna=observed, margins=True)
  1669. tm.assert_frame_equal(table, expected)
  1670. def test_margins_casted_to_float(self):
  1671. # GH 24893
  1672. df = DataFrame(
  1673. {
  1674. "A": [2, 4, 6, 8],
  1675. "B": [1, 4, 5, 8],
  1676. "C": [1, 3, 4, 6],
  1677. "D": ["X", "X", "Y", "Y"],
  1678. }
  1679. )
  1680. result = pivot_table(df, index="D", margins=True)
  1681. expected = DataFrame(
  1682. {"A": [3.0, 7.0, 5], "B": [2.5, 6.5, 4.5], "C": [2.0, 5.0, 3.5]},
  1683. index=Index(["X", "Y", "All"], name="D"),
  1684. )
  1685. tm.assert_frame_equal(result, expected)
  1686. def test_pivot_with_categorical(self, observed, ordered):
  1687. # gh-21370
  1688. idx = [np.nan, "low", "high", "low", np.nan]
  1689. col = [np.nan, "A", "B", np.nan, "A"]
  1690. df = DataFrame(
  1691. {
  1692. "In": Categorical(idx, categories=["low", "high"], ordered=ordered),
  1693. "Col": Categorical(col, categories=["A", "B"], ordered=ordered),
  1694. "Val": range(1, 6),
  1695. }
  1696. )
  1697. # case with index/columns/value
  1698. result = df.pivot_table(
  1699. index="In", columns="Col", values="Val", observed=observed
  1700. )
  1701. expected_cols = pd.CategoricalIndex(["A", "B"], ordered=ordered, name="Col")
  1702. expected = DataFrame(data=[[2.0, np.nan], [np.nan, 3.0]], columns=expected_cols)
  1703. expected.index = Index(
  1704. Categorical(["low", "high"], categories=["low", "high"], ordered=ordered),
  1705. name="In",
  1706. )
  1707. tm.assert_frame_equal(result, expected)
  1708. # case with columns/value
  1709. result = df.pivot_table(columns="Col", values="Val", observed=observed)
  1710. expected = DataFrame(
  1711. data=[[3.5, 3.0]], columns=expected_cols, index=Index(["Val"])
  1712. )
  1713. tm.assert_frame_equal(result, expected)
  1714. def test_categorical_aggfunc(self, observed):
  1715. # GH 9534
  1716. df = DataFrame(
  1717. {"C1": ["A", "B", "C", "C"], "C2": ["a", "a", "b", "b"], "V": [1, 2, 3, 4]}
  1718. )
  1719. df["C1"] = df["C1"].astype("category")
  1720. msg = "The default value of observed=False is deprecated"
  1721. with tm.assert_produces_warning(FutureWarning, match=msg):
  1722. result = df.pivot_table(
  1723. "V", index="C1", columns="C2", dropna=observed, aggfunc="count"
  1724. )
  1725. expected_index = pd.CategoricalIndex(
  1726. ["A", "B", "C"], categories=["A", "B", "C"], ordered=False, name="C1"
  1727. )
  1728. expected_columns = Index(["a", "b"], name="C2")
  1729. expected_data = np.array([[1, 0], [1, 0], [0, 2]], dtype=np.int64)
  1730. expected = DataFrame(
  1731. expected_data, index=expected_index, columns=expected_columns
  1732. )
  1733. tm.assert_frame_equal(result, expected)
  1734. def test_categorical_pivot_index_ordering(self, observed):
  1735. # GH 8731
  1736. df = DataFrame(
  1737. {
  1738. "Sales": [100, 120, 220],
  1739. "Month": ["January", "January", "January"],
  1740. "Year": [2013, 2014, 2013],
  1741. }
  1742. )
  1743. months = [
  1744. "January",
  1745. "February",
  1746. "March",
  1747. "April",
  1748. "May",
  1749. "June",
  1750. "July",
  1751. "August",
  1752. "September",
  1753. "October",
  1754. "November",
  1755. "December",
  1756. ]
  1757. df["Month"] = df["Month"].astype("category").cat.set_categories(months)
  1758. result = df.pivot_table(
  1759. values="Sales",
  1760. index="Month",
  1761. columns="Year",
  1762. observed=observed,
  1763. aggfunc="sum",
  1764. )
  1765. expected_columns = Index([2013, 2014], name="Year", dtype="int64")
  1766. expected_index = pd.CategoricalIndex(
  1767. months, categories=months, ordered=False, name="Month"
  1768. )
  1769. expected_data = [[320, 120]] + [[0, 0]] * 11
  1770. expected = DataFrame(
  1771. expected_data, index=expected_index, columns=expected_columns
  1772. )
  1773. if observed:
  1774. expected = expected.loc[["January"]]
  1775. tm.assert_frame_equal(result, expected)
  1776. def test_pivot_table_not_series(self):
  1777. # GH 4386
  1778. # pivot_table always returns a DataFrame
  1779. # when values is not list like and columns is None
  1780. # and aggfunc is not instance of list
  1781. df = DataFrame({"col1": [3, 4, 5], "col2": ["C", "D", "E"], "col3": [1, 3, 9]})
  1782. result = df.pivot_table("col1", index=["col3", "col2"], aggfunc="sum")
  1783. m = MultiIndex.from_arrays([[1, 3, 9], ["C", "D", "E"]], names=["col3", "col2"])
  1784. expected = DataFrame([3, 4, 5], index=m, columns=["col1"])
  1785. tm.assert_frame_equal(result, expected)
  1786. result = df.pivot_table("col1", index="col3", columns="col2", aggfunc="sum")
  1787. expected = DataFrame(
  1788. [[3, np.nan, np.nan], [np.nan, 4, np.nan], [np.nan, np.nan, 5]],
  1789. index=Index([1, 3, 9], name="col3"),
  1790. columns=Index(["C", "D", "E"], name="col2"),
  1791. )
  1792. tm.assert_frame_equal(result, expected)
  1793. result = df.pivot_table("col1", index="col3", aggfunc=["sum"])
  1794. m = MultiIndex.from_arrays([["sum"], ["col1"]])
  1795. expected = DataFrame([3, 4, 5], index=Index([1, 3, 9], name="col3"), columns=m)
  1796. tm.assert_frame_equal(result, expected)
  1797. def test_pivot_margins_name_unicode(self):
  1798. # issue #13292
  1799. greek = "\u0394\u03bf\u03ba\u03b9\u03bc\u03ae"
  1800. frame = DataFrame({"foo": [1, 2, 3]}, columns=Index(["foo"], dtype=object))
  1801. table = pivot_table(
  1802. frame, index=["foo"], aggfunc=len, margins=True, margins_name=greek
  1803. )
  1804. index = Index([1, 2, 3, greek], dtype="object", name="foo")
  1805. expected = DataFrame(index=index, columns=[])
  1806. tm.assert_frame_equal(table, expected)
  1807. def test_pivot_string_as_func(self):
  1808. # GH #18713
  1809. # for correctness purposes
  1810. data = DataFrame(
  1811. {
  1812. "A": [
  1813. "foo",
  1814. "foo",
  1815. "foo",
  1816. "foo",
  1817. "bar",
  1818. "bar",
  1819. "bar",
  1820. "bar",
  1821. "foo",
  1822. "foo",
  1823. "foo",
  1824. ],
  1825. "B": [
  1826. "one",
  1827. "one",
  1828. "one",
  1829. "two",
  1830. "one",
  1831. "one",
  1832. "one",
  1833. "two",
  1834. "two",
  1835. "two",
  1836. "one",
  1837. ],
  1838. "C": range(11),
  1839. }
  1840. )
  1841. result = pivot_table(data, index="A", columns="B", aggfunc="sum")
  1842. mi = MultiIndex(
  1843. levels=[["C"], ["one", "two"]], codes=[[0, 0], [0, 1]], names=[None, "B"]
  1844. )
  1845. expected = DataFrame(
  1846. {("C", "one"): {"bar": 15, "foo": 13}, ("C", "two"): {"bar": 7, "foo": 20}},
  1847. columns=mi,
  1848. ).rename_axis("A")
  1849. tm.assert_frame_equal(result, expected)
  1850. result = pivot_table(data, index="A", columns="B", aggfunc=["sum", "mean"])
  1851. mi = MultiIndex(
  1852. levels=[["sum", "mean"], ["C"], ["one", "two"]],
  1853. codes=[[0, 0, 1, 1], [0, 0, 0, 0], [0, 1, 0, 1]],
  1854. names=[None, None, "B"],
  1855. )
  1856. expected = DataFrame(
  1857. {
  1858. ("mean", "C", "one"): {"bar": 5.0, "foo": 3.25},
  1859. ("mean", "C", "two"): {"bar": 7.0, "foo": 6.666666666666667},
  1860. ("sum", "C", "one"): {"bar": 15, "foo": 13},
  1861. ("sum", "C", "two"): {"bar": 7, "foo": 20},
  1862. },
  1863. columns=mi,
  1864. ).rename_axis("A")
  1865. tm.assert_frame_equal(result, expected)
  1866. @pytest.mark.parametrize(
  1867. "f, f_numpy",
  1868. [
  1869. ("sum", np.sum),
  1870. ("mean", np.mean),
  1871. ("std", np.std),
  1872. (["sum", "mean"], [np.sum, np.mean]),
  1873. (["sum", "std"], [np.sum, np.std]),
  1874. (["std", "mean"], [np.std, np.mean]),
  1875. ],
  1876. )
  1877. def test_pivot_string_func_vs_func(self, f, f_numpy, data):
  1878. # GH #18713
  1879. # for consistency purposes
  1880. data = data.drop(columns="C")
  1881. result = pivot_table(data, index="A", columns="B", aggfunc=f)
  1882. ops = "|".join(f) if isinstance(f, list) else f
  1883. msg = f"using DataFrameGroupBy.[{ops}]"
  1884. with tm.assert_produces_warning(FutureWarning, match=msg):
  1885. expected = pivot_table(data, index="A", columns="B", aggfunc=f_numpy)
  1886. tm.assert_frame_equal(result, expected)
  1887. @pytest.mark.slow
  1888. def test_pivot_number_of_levels_larger_than_int32(self, monkeypatch):
  1889. # GH 20601
  1890. # GH 26314: Change ValueError to PerformanceWarning
  1891. class MockUnstacker(reshape_lib._Unstacker):
  1892. def __init__(self, *args, **kwargs) -> None:
  1893. # __init__ will raise the warning
  1894. super().__init__(*args, **kwargs)
  1895. raise Exception("Don't compute final result.")
  1896. with monkeypatch.context() as m:
  1897. m.setattr(reshape_lib, "_Unstacker", MockUnstacker)
  1898. df = DataFrame(
  1899. {"ind1": np.arange(2**16), "ind2": np.arange(2**16), "count": 0}
  1900. )
  1901. msg = "The following operation may generate"
  1902. with tm.assert_produces_warning(PerformanceWarning, match=msg):
  1903. with pytest.raises(Exception, match="Don't compute final result."):
  1904. df.pivot_table(
  1905. index="ind1", columns="ind2", values="count", aggfunc="count"
  1906. )
  1907. def test_pivot_table_aggfunc_dropna(self, dropna):
  1908. # GH 22159
  1909. df = DataFrame(
  1910. {
  1911. "fruit": ["apple", "peach", "apple"],
  1912. "size": [1, 1, 2],
  1913. "taste": [7, 6, 6],
  1914. }
  1915. )
  1916. def ret_one(x):
  1917. return 1
  1918. def ret_sum(x):
  1919. return sum(x)
  1920. def ret_none(x):
  1921. return np.nan
  1922. result = pivot_table(
  1923. df, columns="fruit", aggfunc=[ret_sum, ret_none, ret_one], dropna=dropna
  1924. )
  1925. data = [[3, 1, np.nan, np.nan, 1, 1], [13, 6, np.nan, np.nan, 1, 1]]
  1926. col = MultiIndex.from_product(
  1927. [["ret_sum", "ret_none", "ret_one"], ["apple", "peach"]],
  1928. names=[None, "fruit"],
  1929. )
  1930. expected = DataFrame(data, index=["size", "taste"], columns=col)
  1931. if dropna:
  1932. expected = expected.dropna(axis="columns")
  1933. tm.assert_frame_equal(result, expected)
  1934. def test_pivot_table_aggfunc_scalar_dropna(self, dropna):
  1935. # GH 22159
  1936. df = DataFrame(
  1937. {"A": ["one", "two", "one"], "x": [3, np.nan, 2], "y": [1, np.nan, np.nan]}
  1938. )
  1939. result = pivot_table(df, columns="A", aggfunc="mean", dropna=dropna)
  1940. data = [[2.5, np.nan], [1, np.nan]]
  1941. col = Index(["one", "two"], name="A")
  1942. expected = DataFrame(data, index=["x", "y"], columns=col)
  1943. if dropna:
  1944. expected = expected.dropna(axis="columns")
  1945. tm.assert_frame_equal(result, expected)
  1946. @pytest.mark.parametrize("margins", [True, False])
  1947. def test_pivot_table_empty_aggfunc(self, margins):
  1948. # GH 9186 & GH 13483 & GH 49240
  1949. df = DataFrame(
  1950. {
  1951. "A": [2, 2, 3, 3, 2],
  1952. "id": [5, 6, 7, 8, 9],
  1953. "C": ["p", "q", "q", "p", "q"],
  1954. "D": [None, None, None, None, None],
  1955. }
  1956. )
  1957. result = df.pivot_table(
  1958. index="A", columns="D", values="id", aggfunc=np.size, margins=margins
  1959. )
  1960. exp_cols = Index([], name="D")
  1961. expected = DataFrame(index=Index([], dtype="int64", name="A"), columns=exp_cols)
  1962. tm.assert_frame_equal(result, expected)
  1963. def test_pivot_table_no_column_raises(self):
  1964. # GH 10326
  1965. def agg(arr):
  1966. return np.mean(arr)
  1967. df = DataFrame({"X": [0, 0, 1, 1], "Y": [0, 1, 0, 1], "Z": [10, 20, 30, 40]})
  1968. with pytest.raises(KeyError, match="notpresent"):
  1969. df.pivot_table("notpresent", "X", "Y", aggfunc=agg)
  1970. def test_pivot_table_multiindex_columns_doctest_case(self):
  1971. # The relevant characteristic is that the call
  1972. # to maybe_downcast_to_dtype(agged[v], data[v].dtype) in
  1973. # __internal_pivot_table has `agged[v]` a DataFrame instead of Series,
  1974. # In this case this is because agged.columns is a MultiIndex and 'v'
  1975. # is only indexing on its first level.
  1976. df = DataFrame(
  1977. {
  1978. "A": ["foo", "foo", "foo", "foo", "foo", "bar", "bar", "bar", "bar"],
  1979. "B": ["one", "one", "one", "two", "two", "one", "one", "two", "two"],
  1980. "C": [
  1981. "small",
  1982. "large",
  1983. "large",
  1984. "small",
  1985. "small",
  1986. "large",
  1987. "small",
  1988. "small",
  1989. "large",
  1990. ],
  1991. "D": [1, 2, 2, 3, 3, 4, 5, 6, 7],
  1992. "E": [2, 4, 5, 5, 6, 6, 8, 9, 9],
  1993. }
  1994. )
  1995. table = pivot_table(
  1996. df,
  1997. values=["D", "E"],
  1998. index=["A", "C"],
  1999. aggfunc={"D": "mean", "E": ["min", "max", "mean"]},
  2000. )
  2001. cols = MultiIndex.from_tuples(
  2002. [("D", "mean"), ("E", "max"), ("E", "mean"), ("E", "min")]
  2003. )
  2004. index = MultiIndex.from_tuples(
  2005. [("bar", "large"), ("bar", "small"), ("foo", "large"), ("foo", "small")],
  2006. names=["A", "C"],
  2007. )
  2008. vals = np.array(
  2009. [
  2010. [5.5, 9.0, 7.5, 6.0],
  2011. [5.5, 9.0, 8.5, 8.0],
  2012. [2.0, 5.0, 4.5, 4.0],
  2013. [2.33333333, 6.0, 4.33333333, 2.0],
  2014. ]
  2015. )
  2016. expected = DataFrame(vals, columns=cols, index=index)
  2017. expected[("E", "min")] = expected[("E", "min")].astype(np.int64)
  2018. expected[("E", "max")] = expected[("E", "max")].astype(np.int64)
  2019. tm.assert_frame_equal(table, expected)
  2020. def test_pivot_table_sort_false(self):
  2021. # GH#39143
  2022. df = DataFrame(
  2023. {
  2024. "a": ["d1", "d4", "d3"],
  2025. "col": ["a", "b", "c"],
  2026. "num": [23, 21, 34],
  2027. "year": ["2018", "2018", "2019"],
  2028. }
  2029. )
  2030. result = df.pivot_table(
  2031. index=["a", "col"], columns="year", values="num", aggfunc="sum", sort=False
  2032. )
  2033. expected = DataFrame(
  2034. [[23, np.nan], [21, np.nan], [np.nan, 34]],
  2035. columns=Index(["2018", "2019"], name="year"),
  2036. index=MultiIndex.from_arrays(
  2037. [["d1", "d4", "d3"], ["a", "b", "c"]], names=["a", "col"]
  2038. ),
  2039. )
  2040. tm.assert_frame_equal(result, expected)
  2041. def test_pivot_table_nullable_margins(self):
  2042. # GH#48681
  2043. df = DataFrame(
  2044. {"a": "A", "b": [1, 2], "sales": Series([10, 11], dtype="Int64")}
  2045. )
  2046. result = df.pivot_table(index="b", columns="a", margins=True, aggfunc="sum")
  2047. expected = DataFrame(
  2048. [[10, 10], [11, 11], [21, 21]],
  2049. index=Index([1, 2, "All"], name="b"),
  2050. columns=MultiIndex.from_tuples(
  2051. [("sales", "A"), ("sales", "All")], names=[None, "a"]
  2052. ),
  2053. dtype="Int64",
  2054. )
  2055. tm.assert_frame_equal(result, expected)
  2056. def test_pivot_table_sort_false_with_multiple_values(self):
  2057. df = DataFrame(
  2058. {
  2059. "firstname": ["John", "Michael"],
  2060. "lastname": ["Foo", "Bar"],
  2061. "height": [173, 182],
  2062. "age": [47, 33],
  2063. }
  2064. )
  2065. result = df.pivot_table(
  2066. index=["lastname", "firstname"], values=["height", "age"], sort=False
  2067. )
  2068. expected = DataFrame(
  2069. [[173.0, 47.0], [182.0, 33.0]],
  2070. columns=["height", "age"],
  2071. index=MultiIndex.from_tuples(
  2072. [("Foo", "John"), ("Bar", "Michael")],
  2073. names=["lastname", "firstname"],
  2074. ),
  2075. )
  2076. tm.assert_frame_equal(result, expected)
  2077. def test_pivot_table_with_margins_and_numeric_columns(self):
  2078. # GH 26568
  2079. df = DataFrame([["a", "x", 1], ["a", "y", 2], ["b", "y", 3], ["b", "z", 4]])
  2080. df.columns = [10, 20, 30]
  2081. result = df.pivot_table(
  2082. index=10, columns=20, values=30, aggfunc="sum", fill_value=0, margins=True
  2083. )
  2084. expected = DataFrame([[1, 2, 0, 3], [0, 3, 4, 7], [1, 5, 4, 10]])
  2085. expected.columns = ["x", "y", "z", "All"]
  2086. expected.index = ["a", "b", "All"]
  2087. expected.columns.name = 20
  2088. expected.index.name = 10
  2089. tm.assert_frame_equal(result, expected)
  2090. @pytest.mark.parametrize("dropna", [True, False])
  2091. def test_pivot_ea_dtype_dropna(self, dropna):
  2092. # GH#47477
  2093. df = DataFrame({"x": "a", "y": "b", "age": Series([20, 40], dtype="Int64")})
  2094. result = df.pivot_table(
  2095. index="x", columns="y", values="age", aggfunc="mean", dropna=dropna
  2096. )
  2097. expected = DataFrame(
  2098. [[30]],
  2099. index=Index(["a"], name="x"),
  2100. columns=Index(["b"], name="y"),
  2101. dtype="Float64",
  2102. )
  2103. tm.assert_frame_equal(result, expected)
  2104. def test_pivot_table_datetime_warning(self):
  2105. # GH#48683
  2106. df = DataFrame(
  2107. {
  2108. "a": "A",
  2109. "b": [1, 2],
  2110. "date": pd.Timestamp("2019-12-31"),
  2111. "sales": [10.0, 11],
  2112. }
  2113. )
  2114. with tm.assert_produces_warning(None):
  2115. result = df.pivot_table(
  2116. index=["b", "date"], columns="a", margins=True, aggfunc="sum"
  2117. )
  2118. expected = DataFrame(
  2119. [[10.0, 10.0], [11.0, 11.0], [21.0, 21.0]],
  2120. index=MultiIndex.from_arrays(
  2121. [
  2122. Index([1, 2, "All"], name="b"),
  2123. Index(
  2124. [pd.Timestamp("2019-12-31"), pd.Timestamp("2019-12-31"), ""],
  2125. dtype=object,
  2126. name="date",
  2127. ),
  2128. ]
  2129. ),
  2130. columns=MultiIndex.from_tuples(
  2131. [("sales", "A"), ("sales", "All")], names=[None, "a"]
  2132. ),
  2133. )
  2134. tm.assert_frame_equal(result, expected)
  2135. def test_pivot_table_with_mixed_nested_tuples(self, using_array_manager):
  2136. # GH 50342
  2137. df = DataFrame(
  2138. {
  2139. "A": ["foo", "foo", "foo", "foo", "foo", "bar", "bar", "bar", "bar"],
  2140. "B": ["one", "one", "one", "two", "two", "one", "one", "two", "two"],
  2141. "C": [
  2142. "small",
  2143. "large",
  2144. "large",
  2145. "small",
  2146. "small",
  2147. "large",
  2148. "small",
  2149. "small",
  2150. "large",
  2151. ],
  2152. "D": [1, 2, 2, 3, 3, 4, 5, 6, 7],
  2153. "E": [2, 4, 5, 5, 6, 6, 8, 9, 9],
  2154. ("col5",): [
  2155. "foo",
  2156. "foo",
  2157. "foo",
  2158. "foo",
  2159. "foo",
  2160. "bar",
  2161. "bar",
  2162. "bar",
  2163. "bar",
  2164. ],
  2165. ("col6", 6): [
  2166. "one",
  2167. "one",
  2168. "one",
  2169. "two",
  2170. "two",
  2171. "one",
  2172. "one",
  2173. "two",
  2174. "two",
  2175. ],
  2176. (7, "seven"): [
  2177. "small",
  2178. "large",
  2179. "large",
  2180. "small",
  2181. "small",
  2182. "large",
  2183. "small",
  2184. "small",
  2185. "large",
  2186. ],
  2187. }
  2188. )
  2189. result = pivot_table(
  2190. df, values="D", index=["A", "B"], columns=[(7, "seven")], aggfunc="sum"
  2191. )
  2192. expected = DataFrame(
  2193. [[4.0, 5.0], [7.0, 6.0], [4.0, 1.0], [np.nan, 6.0]],
  2194. columns=Index(["large", "small"], name=(7, "seven")),
  2195. index=MultiIndex.from_arrays(
  2196. [["bar", "bar", "foo", "foo"], ["one", "two"] * 2], names=["A", "B"]
  2197. ),
  2198. )
  2199. if using_array_manager:
  2200. # INFO(ArrayManager) column without NaNs can preserve int dtype
  2201. expected["small"] = expected["small"].astype("int64")
  2202. tm.assert_frame_equal(result, expected)
  2203. def test_pivot_table_aggfunc_nunique_with_different_values(self):
  2204. test = DataFrame(
  2205. {
  2206. "a": range(10),
  2207. "b": range(10),
  2208. "c": range(10),
  2209. "d": range(10),
  2210. }
  2211. )
  2212. columnval = MultiIndex.from_arrays(
  2213. [
  2214. ["nunique" for i in range(10)],
  2215. ["c" for i in range(10)],
  2216. range(10),
  2217. ],
  2218. names=(None, None, "b"),
  2219. )
  2220. nparr = np.full((10, 10), np.nan)
  2221. np.fill_diagonal(nparr, 1.0)
  2222. expected = DataFrame(nparr, index=Index(range(10), name="a"), columns=columnval)
  2223. result = test.pivot_table(
  2224. index=[
  2225. "a",
  2226. ],
  2227. columns=[
  2228. "b",
  2229. ],
  2230. values=[
  2231. "c",
  2232. ],
  2233. aggfunc=["nunique"],
  2234. )
  2235. tm.assert_frame_equal(result, expected)
  2236. class TestPivot:
  2237. def test_pivot(self):
  2238. data = {
  2239. "index": ["A", "B", "C", "C", "B", "A"],
  2240. "columns": ["One", "One", "One", "Two", "Two", "Two"],
  2241. "values": [1.0, 2.0, 3.0, 3.0, 2.0, 1.0],
  2242. }
  2243. frame = DataFrame(data)
  2244. pivoted = frame.pivot(index="index", columns="columns", values="values")
  2245. expected = DataFrame(
  2246. {
  2247. "One": {"A": 1.0, "B": 2.0, "C": 3.0},
  2248. "Two": {"A": 1.0, "B": 2.0, "C": 3.0},
  2249. }
  2250. )
  2251. expected.index.name, expected.columns.name = "index", "columns"
  2252. tm.assert_frame_equal(pivoted, expected)
  2253. # name tracking
  2254. assert pivoted.index.name == "index"
  2255. assert pivoted.columns.name == "columns"
  2256. # don't specify values
  2257. pivoted = frame.pivot(index="index", columns="columns")
  2258. assert pivoted.index.name == "index"
  2259. assert pivoted.columns.names == (None, "columns")
  2260. def test_pivot_duplicates(self):
  2261. data = DataFrame(
  2262. {
  2263. "a": ["bar", "bar", "foo", "foo", "foo"],
  2264. "b": ["one", "two", "one", "one", "two"],
  2265. "c": [1.0, 2.0, 3.0, 3.0, 4.0],
  2266. }
  2267. )
  2268. with pytest.raises(ValueError, match="duplicate entries"):
  2269. data.pivot(index="a", columns="b", values="c")
  2270. def test_pivot_empty(self):
  2271. df = DataFrame(columns=["a", "b", "c"])
  2272. result = df.pivot(index="a", columns="b", values="c")
  2273. expected = DataFrame(index=[], columns=[])
  2274. tm.assert_frame_equal(result, expected, check_names=False)
  2275. def test_pivot_integer_bug(self, any_string_dtype):
  2276. df = DataFrame(
  2277. data=[("A", "1", "A1"), ("B", "2", "B2")], dtype=any_string_dtype
  2278. )
  2279. result = df.pivot(index=1, columns=0, values=2)
  2280. expected_columns = Index(["A", "B"], name=0, dtype=any_string_dtype)
  2281. if any_string_dtype == "object":
  2282. expected_columns = expected_columns.astype("str")
  2283. tm.assert_index_equal(result.columns, expected_columns)
  2284. def test_pivot_index_none(self):
  2285. # GH#3962
  2286. data = {
  2287. "index": ["A", "B", "C", "C", "B", "A"],
  2288. "columns": ["One", "One", "One", "Two", "Two", "Two"],
  2289. "values": [1.0, 2.0, 3.0, 3.0, 2.0, 1.0],
  2290. }
  2291. frame = DataFrame(data).set_index("index")
  2292. result = frame.pivot(columns="columns", values="values")
  2293. expected = DataFrame(
  2294. {
  2295. "One": {"A": 1.0, "B": 2.0, "C": 3.0},
  2296. "Two": {"A": 1.0, "B": 2.0, "C": 3.0},
  2297. }
  2298. )
  2299. expected.index.name, expected.columns.name = "index", "columns"
  2300. tm.assert_frame_equal(result, expected)
  2301. # omit values
  2302. result = frame.pivot(columns="columns")
  2303. expected.columns = MultiIndex.from_tuples(
  2304. [("values", "One"), ("values", "Two")], names=[None, "columns"]
  2305. )
  2306. expected.index.name = "index"
  2307. tm.assert_frame_equal(result, expected, check_names=False)
  2308. assert result.index.name == "index"
  2309. assert result.columns.names == (None, "columns")
  2310. expected.columns = expected.columns.droplevel(0)
  2311. result = frame.pivot(columns="columns", values="values")
  2312. expected.columns.name = "columns"
  2313. tm.assert_frame_equal(result, expected)
  2314. def test_pivot_index_list_values_none_immutable_args(self):
  2315. # GH37635
  2316. df = DataFrame(
  2317. {
  2318. "lev1": [1, 1, 1, 2, 2, 2],
  2319. "lev2": [1, 1, 2, 1, 1, 2],
  2320. "lev3": [1, 2, 1, 2, 1, 2],
  2321. "lev4": [1, 2, 3, 4, 5, 6],
  2322. "values": [0, 1, 2, 3, 4, 5],
  2323. }
  2324. )
  2325. index = ["lev1", "lev2"]
  2326. columns = ["lev3"]
  2327. result = df.pivot(index=index, columns=columns)
  2328. expected = DataFrame(
  2329. np.array(
  2330. [
  2331. [1.0, 2.0, 0.0, 1.0],
  2332. [3.0, np.nan, 2.0, np.nan],
  2333. [5.0, 4.0, 4.0, 3.0],
  2334. [np.nan, 6.0, np.nan, 5.0],
  2335. ]
  2336. ),
  2337. index=MultiIndex.from_arrays(
  2338. [(1, 1, 2, 2), (1, 2, 1, 2)], names=["lev1", "lev2"]
  2339. ),
  2340. columns=MultiIndex.from_arrays(
  2341. [("lev4", "lev4", "values", "values"), (1, 2, 1, 2)],
  2342. names=[None, "lev3"],
  2343. ),
  2344. )
  2345. tm.assert_frame_equal(result, expected)
  2346. assert index == ["lev1", "lev2"]
  2347. assert columns == ["lev3"]
  2348. def test_pivot_columns_not_given(self):
  2349. # GH#48293
  2350. df = DataFrame({"a": [1], "b": 1})
  2351. with pytest.raises(TypeError, match="missing 1 required keyword-only argument"):
  2352. df.pivot() # pylint: disable=missing-kwoa
  2353. # this still fails because columns=None gets passed down to unstack as level=None
  2354. # while at that point None was converted to NaN
  2355. @pytest.mark.xfail(
  2356. using_string_dtype(), reason="TODO(infer_string) None is cast to NaN"
  2357. )
  2358. def test_pivot_columns_is_none(self):
  2359. # GH#48293
  2360. df = DataFrame({None: [1], "b": 2, "c": 3})
  2361. result = df.pivot(columns=None)
  2362. expected = DataFrame({("b", 1): [2], ("c", 1): 3})
  2363. tm.assert_frame_equal(result, expected)
  2364. result = df.pivot(columns=None, index="b")
  2365. expected = DataFrame({("c", 1): 3}, index=Index([2], name="b"))
  2366. tm.assert_frame_equal(result, expected)
  2367. result = df.pivot(columns=None, index="b", values="c")
  2368. expected = DataFrame({1: 3}, index=Index([2], name="b"))
  2369. tm.assert_frame_equal(result, expected)
  2370. def test_pivot_index_is_none(self, using_infer_string):
  2371. # GH#48293
  2372. df = DataFrame({None: [1], "b": 2, "c": 3})
  2373. result = df.pivot(columns="b", index=None)
  2374. expected = DataFrame({("c", 2): 3}, index=[1])
  2375. expected.columns.names = [None, "b"]
  2376. tm.assert_frame_equal(result, expected)
  2377. result = df.pivot(columns="b", index=None, values="c")
  2378. expected = DataFrame(3, index=[1], columns=Index([2], name="b"))
  2379. if using_infer_string:
  2380. expected.index.name = np.nan
  2381. tm.assert_frame_equal(result, expected)
  2382. def test_pivot_values_is_none(self):
  2383. # GH#48293
  2384. df = DataFrame({None: [1], "b": 2, "c": 3})
  2385. result = df.pivot(columns="b", index="c", values=None)
  2386. expected = DataFrame(
  2387. 1, index=Index([3], name="c"), columns=Index([2], name="b")
  2388. )
  2389. tm.assert_frame_equal(result, expected)
  2390. result = df.pivot(columns="b", values=None)
  2391. expected = DataFrame(1, index=[0], columns=Index([2], name="b"))
  2392. tm.assert_frame_equal(result, expected)
  2393. def test_pivot_not_changing_index_name(self):
  2394. # GH#52692
  2395. df = DataFrame({"one": ["a"], "two": 0, "three": 1})
  2396. expected = df.copy(deep=True)
  2397. df.pivot(index="one", columns="two", values="three")
  2398. tm.assert_frame_equal(df, expected)
  2399. def test_pivot_table_empty_dataframe_correct_index(self):
  2400. # GH 21932
  2401. df = DataFrame([], columns=["a", "b", "value"])
  2402. pivot = df.pivot_table(index="a", columns="b", values="value", aggfunc="count")
  2403. expected = Index([], dtype="object", name="b")
  2404. tm.assert_index_equal(pivot.columns, expected)
  2405. def test_pivot_table_handles_explicit_datetime_types(self):
  2406. # GH#43574
  2407. df = DataFrame(
  2408. [
  2409. {"a": "x", "date_str": "2023-01-01", "amount": 1},
  2410. {"a": "y", "date_str": "2023-01-02", "amount": 2},
  2411. {"a": "z", "date_str": "2023-01-03", "amount": 3},
  2412. ]
  2413. )
  2414. df["date"] = pd.to_datetime(df["date_str"])
  2415. with tm.assert_produces_warning(False):
  2416. pivot = df.pivot_table(
  2417. index=["a", "date"], values=["amount"], aggfunc="sum", margins=True
  2418. )
  2419. expected = MultiIndex.from_tuples(
  2420. [
  2421. ("x", datetime.strptime("2023-01-01 00:00:00", "%Y-%m-%d %H:%M:%S")),
  2422. ("y", datetime.strptime("2023-01-02 00:00:00", "%Y-%m-%d %H:%M:%S")),
  2423. ("z", datetime.strptime("2023-01-03 00:00:00", "%Y-%m-%d %H:%M:%S")),
  2424. ("All", ""),
  2425. ],
  2426. names=["a", "date"],
  2427. )
  2428. tm.assert_index_equal(pivot.index, expected)
  2429. def test_pivot_table_with_margins_and_numeric_column_names(self):
  2430. # GH#26568
  2431. df = DataFrame([["a", "x", 1], ["a", "y", 2], ["b", "y", 3], ["b", "z", 4]])
  2432. result = df.pivot_table(
  2433. index=0, columns=1, values=2, aggfunc="sum", fill_value=0, margins=True
  2434. )
  2435. expected = DataFrame(
  2436. [[1, 2, 0, 3], [0, 3, 4, 7], [1, 5, 4, 10]],
  2437. columns=Index(["x", "y", "z", "All"], name=1),
  2438. index=Index(["a", "b", "All"], name=0),
  2439. )
  2440. tm.assert_frame_equal(result, expected)