test_constructors.py 122 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916291729182919292029212922292329242925292629272928292929302931293229332934293529362937293829392940294129422943294429452946294729482949295029512952295329542955295629572958295929602961296229632964296529662967296829692970297129722973297429752976297729782979298029812982298329842985298629872988298929902991299229932994299529962997299829993000300130023003300430053006300730083009301030113012301330143015301630173018301930203021302230233024302530263027302830293030303130323033303430353036303730383039304030413042304330443045304630473048304930503051305230533054305530563057305830593060306130623063306430653066306730683069307030713072307330743075307630773078307930803081308230833084308530863087308830893090309130923093309430953096309730983099310031013102310331043105310631073108310931103111311231133114311531163117311831193120312131223123312431253126312731283129313031313132313331343135313631373138313931403141314231433144314531463147314831493150315131523153315431553156315731583159316031613162316331643165316631673168316931703171317231733174317531763177317831793180318131823183318431853186318731883189319031913192319331943195319631973198319932003201320232033204320532063207320832093210321132123213321432153216321732183219322032213222322332243225322632273228322932303231323232333234323532363237323832393240324132423243324432453246324732483249325032513252325332543255325632573258325932603261326232633264326532663267326832693270327132723273327432753276327732783279328032813282328332843285328632873288328932903291329232933294329532963297329832993300330133023303330433053306330733083309331033113312331333143315331633173318331933203321332233233324332533263327332833293330333133323333333433353336333733383339334033413342334333443345334633473348334933503351335233533354335533563357335833593360336133623363336433653366336733683369337033713372337333743375337633773378337933803381338233833384338533863387
  1. import array
  2. from collections import (
  3. OrderedDict,
  4. abc,
  5. defaultdict,
  6. namedtuple,
  7. )
  8. from collections.abc import Iterator
  9. from dataclasses import make_dataclass
  10. from datetime import (
  11. date,
  12. datetime,
  13. timedelta,
  14. )
  15. import functools
  16. import re
  17. import numpy as np
  18. from numpy import ma
  19. from numpy.ma import mrecords
  20. import pytest
  21. import pytz
  22. from pandas._libs import lib
  23. from pandas.compat.numpy import np_version_gt2
  24. from pandas.errors import IntCastingNaNError
  25. import pandas.util._test_decorators as td
  26. from pandas.core.dtypes.common import is_integer_dtype
  27. from pandas.core.dtypes.dtypes import (
  28. DatetimeTZDtype,
  29. IntervalDtype,
  30. NumpyEADtype,
  31. PeriodDtype,
  32. )
  33. import pandas as pd
  34. from pandas import (
  35. Categorical,
  36. CategoricalIndex,
  37. DataFrame,
  38. DatetimeIndex,
  39. Index,
  40. Interval,
  41. MultiIndex,
  42. Period,
  43. RangeIndex,
  44. Series,
  45. Timedelta,
  46. Timestamp,
  47. cut,
  48. date_range,
  49. isna,
  50. )
  51. import pandas._testing as tm
  52. from pandas.arrays import (
  53. DatetimeArray,
  54. IntervalArray,
  55. PeriodArray,
  56. SparseArray,
  57. TimedeltaArray,
  58. )
  59. MIXED_FLOAT_DTYPES = ["float16", "float32", "float64"]
  60. MIXED_INT_DTYPES = [
  61. "uint8",
  62. "uint16",
  63. "uint32",
  64. "uint64",
  65. "int8",
  66. "int16",
  67. "int32",
  68. "int64",
  69. ]
  70. class TestDataFrameConstructors:
  71. def test_constructor_from_ndarray_with_str_dtype(self):
  72. # If we don't ravel/reshape around ensure_str_array, we end up
  73. # with an array of strings each of which is e.g. "[0 1 2]"
  74. arr = np.arange(12).reshape(4, 3)
  75. df = DataFrame(arr, dtype=str)
  76. expected = DataFrame(arr.astype(str), dtype="str")
  77. tm.assert_frame_equal(df, expected)
  78. def test_constructor_from_2d_datetimearray(self, using_array_manager):
  79. dti = date_range("2016-01-01", periods=6, tz="US/Pacific")
  80. dta = dti._data.reshape(3, 2)
  81. df = DataFrame(dta)
  82. expected = DataFrame({0: dta[:, 0], 1: dta[:, 1]})
  83. tm.assert_frame_equal(df, expected)
  84. if not using_array_manager:
  85. # GH#44724 big performance hit if we de-consolidate
  86. assert len(df._mgr.blocks) == 1
  87. def test_constructor_dict_with_tzaware_scalar(self):
  88. # GH#42505
  89. dt = Timestamp("2019-11-03 01:00:00-0700").tz_convert("America/Los_Angeles")
  90. dt = dt.as_unit("ns")
  91. df = DataFrame({"dt": dt}, index=[0])
  92. expected = DataFrame({"dt": [dt]})
  93. tm.assert_frame_equal(df, expected)
  94. # Non-homogeneous
  95. df = DataFrame({"dt": dt, "value": [1]})
  96. expected = DataFrame({"dt": [dt], "value": [1]})
  97. tm.assert_frame_equal(df, expected)
  98. def test_construct_ndarray_with_nas_and_int_dtype(self):
  99. # GH#26919 match Series by not casting np.nan to meaningless int
  100. arr = np.array([[1, np.nan], [2, 3]])
  101. msg = r"Cannot convert non-finite values \(NA or inf\) to integer"
  102. with pytest.raises(IntCastingNaNError, match=msg):
  103. DataFrame(arr, dtype="i8")
  104. # check this matches Series behavior
  105. with pytest.raises(IntCastingNaNError, match=msg):
  106. Series(arr[0], dtype="i8", name=0)
  107. def test_construct_from_list_of_datetimes(self):
  108. df = DataFrame([datetime.now(), datetime.now()])
  109. assert df[0].dtype == np.dtype("M8[ns]")
  110. def test_constructor_from_tzaware_datetimeindex(self):
  111. # don't cast a DatetimeIndex WITH a tz, leave as object
  112. # GH#6032
  113. naive = DatetimeIndex(["2013-1-1 13:00", "2013-1-2 14:00"], name="B")
  114. idx = naive.tz_localize("US/Pacific")
  115. expected = Series(np.array(idx.tolist(), dtype="object"), name="B")
  116. assert expected.dtype == idx.dtype
  117. # convert index to series
  118. result = Series(idx)
  119. tm.assert_series_equal(result, expected)
  120. def test_columns_with_leading_underscore_work_with_to_dict(self):
  121. col_underscore = "_b"
  122. df = DataFrame({"a": [1, 2], col_underscore: [3, 4]})
  123. d = df.to_dict(orient="records")
  124. ref_d = [{"a": 1, col_underscore: 3}, {"a": 2, col_underscore: 4}]
  125. assert ref_d == d
  126. def test_columns_with_leading_number_and_underscore_work_with_to_dict(self):
  127. col_with_num = "1_b"
  128. df = DataFrame({"a": [1, 2], col_with_num: [3, 4]})
  129. d = df.to_dict(orient="records")
  130. ref_d = [{"a": 1, col_with_num: 3}, {"a": 2, col_with_num: 4}]
  131. assert ref_d == d
  132. def test_array_of_dt64_nat_with_td64dtype_raises(self, frame_or_series):
  133. # GH#39462
  134. nat = np.datetime64("NaT", "ns")
  135. arr = np.array([nat], dtype=object)
  136. if frame_or_series is DataFrame:
  137. arr = arr.reshape(1, 1)
  138. msg = "Invalid type for timedelta scalar: <class 'numpy.datetime64'>"
  139. with pytest.raises(TypeError, match=msg):
  140. frame_or_series(arr, dtype="m8[ns]")
  141. @pytest.mark.parametrize("kind", ["m", "M"])
  142. def test_datetimelike_values_with_object_dtype(self, kind, frame_or_series):
  143. # with dtype=object, we should cast dt64 values to Timestamps, not pydatetimes
  144. if kind == "M":
  145. dtype = "M8[ns]"
  146. scalar_type = Timestamp
  147. else:
  148. dtype = "m8[ns]"
  149. scalar_type = Timedelta
  150. arr = np.arange(6, dtype="i8").view(dtype).reshape(3, 2)
  151. if frame_or_series is Series:
  152. arr = arr[:, 0]
  153. obj = frame_or_series(arr, dtype=object)
  154. assert obj._mgr.arrays[0].dtype == object
  155. assert isinstance(obj._mgr.arrays[0].ravel()[0], scalar_type)
  156. # go through a different path in internals.construction
  157. obj = frame_or_series(frame_or_series(arr), dtype=object)
  158. assert obj._mgr.arrays[0].dtype == object
  159. assert isinstance(obj._mgr.arrays[0].ravel()[0], scalar_type)
  160. obj = frame_or_series(frame_or_series(arr), dtype=NumpyEADtype(object))
  161. assert obj._mgr.arrays[0].dtype == object
  162. assert isinstance(obj._mgr.arrays[0].ravel()[0], scalar_type)
  163. if frame_or_series is DataFrame:
  164. # other paths through internals.construction
  165. sers = [Series(x) for x in arr]
  166. obj = frame_or_series(sers, dtype=object)
  167. assert obj._mgr.arrays[0].dtype == object
  168. assert isinstance(obj._mgr.arrays[0].ravel()[0], scalar_type)
  169. def test_series_with_name_not_matching_column(self):
  170. # GH#9232
  171. x = Series(range(5), name=1)
  172. y = Series(range(5), name=0)
  173. result = DataFrame(x, columns=[0])
  174. expected = DataFrame([], columns=[0])
  175. tm.assert_frame_equal(result, expected)
  176. result = DataFrame(y, columns=[1])
  177. expected = DataFrame([], columns=[1])
  178. tm.assert_frame_equal(result, expected)
  179. @pytest.mark.parametrize(
  180. "constructor",
  181. [
  182. lambda: DataFrame(),
  183. lambda: DataFrame(None),
  184. lambda: DataFrame(()),
  185. lambda: DataFrame([]),
  186. lambda: DataFrame(_ for _ in []),
  187. lambda: DataFrame(range(0)),
  188. lambda: DataFrame(data=None),
  189. lambda: DataFrame(data=()),
  190. lambda: DataFrame(data=[]),
  191. lambda: DataFrame(data=(_ for _ in [])),
  192. lambda: DataFrame(data=range(0)),
  193. ],
  194. )
  195. def test_empty_constructor(self, constructor):
  196. expected = DataFrame()
  197. result = constructor()
  198. assert len(result.index) == 0
  199. assert len(result.columns) == 0
  200. tm.assert_frame_equal(result, expected)
  201. @pytest.mark.parametrize(
  202. "constructor",
  203. [
  204. lambda: DataFrame({}),
  205. lambda: DataFrame(data={}),
  206. ],
  207. )
  208. def test_empty_constructor_object_index(self, constructor):
  209. expected = DataFrame(index=RangeIndex(0), columns=RangeIndex(0))
  210. result = constructor()
  211. assert len(result.index) == 0
  212. assert len(result.columns) == 0
  213. tm.assert_frame_equal(result, expected, check_index_type=True)
  214. @pytest.mark.parametrize(
  215. "emptylike,expected_index,expected_columns",
  216. [
  217. ([[]], RangeIndex(1), RangeIndex(0)),
  218. ([[], []], RangeIndex(2), RangeIndex(0)),
  219. ([(_ for _ in [])], RangeIndex(1), RangeIndex(0)),
  220. ],
  221. )
  222. def test_emptylike_constructor(self, emptylike, expected_index, expected_columns):
  223. expected = DataFrame(index=expected_index, columns=expected_columns)
  224. result = DataFrame(emptylike)
  225. tm.assert_frame_equal(result, expected)
  226. def test_constructor_mixed(self, float_string_frame, using_infer_string):
  227. dtype = "str" if using_infer_string else np.object_
  228. assert float_string_frame["foo"].dtype == dtype
  229. def test_constructor_cast_failure(self):
  230. # as of 2.0, we raise if we can't respect "dtype", previously we
  231. # silently ignored
  232. msg = "could not convert string to float"
  233. with pytest.raises(ValueError, match=msg):
  234. DataFrame({"a": ["a", "b", "c"]}, dtype=np.float64)
  235. # GH 3010, constructing with odd arrays
  236. df = DataFrame(np.ones((4, 2)))
  237. # this is ok
  238. df["foo"] = np.ones((4, 2)).tolist()
  239. # this is not ok
  240. msg = "Expected a 1D array, got an array with shape \\(4, 2\\)"
  241. with pytest.raises(ValueError, match=msg):
  242. df["test"] = np.ones((4, 2))
  243. # this is ok
  244. df["foo2"] = np.ones((4, 2)).tolist()
  245. def test_constructor_dtype_copy(self):
  246. orig_df = DataFrame({"col1": [1.0], "col2": [2.0], "col3": [3.0]})
  247. new_df = DataFrame(orig_df, dtype=float, copy=True)
  248. new_df["col1"] = 200.0
  249. assert orig_df["col1"][0] == 1.0
  250. def test_constructor_dtype_nocast_view_dataframe(
  251. self, using_copy_on_write, warn_copy_on_write
  252. ):
  253. df = DataFrame([[1, 2]])
  254. should_be_view = DataFrame(df, dtype=df[0].dtype)
  255. if using_copy_on_write:
  256. should_be_view.iloc[0, 0] = 99
  257. assert df.values[0, 0] == 1
  258. else:
  259. with tm.assert_cow_warning(warn_copy_on_write):
  260. should_be_view.iloc[0, 0] = 99
  261. assert df.values[0, 0] == 99
  262. def test_constructor_dtype_nocast_view_2d_array(
  263. self, using_array_manager, using_copy_on_write, warn_copy_on_write
  264. ):
  265. df = DataFrame([[1, 2], [3, 4]], dtype="int64")
  266. if not using_array_manager and not using_copy_on_write:
  267. should_be_view = DataFrame(df.values, dtype=df[0].dtype)
  268. # TODO(CoW-warn) this should warn
  269. # with tm.assert_cow_warning(warn_copy_on_write):
  270. should_be_view.iloc[0, 0] = 97
  271. assert df.values[0, 0] == 97
  272. else:
  273. # INFO(ArrayManager) DataFrame(ndarray) doesn't necessarily preserve
  274. # a view on the array to ensure contiguous 1D arrays
  275. df2 = DataFrame(df.values, dtype=df[0].dtype)
  276. assert df2._mgr.arrays[0].flags.c_contiguous
  277. @td.skip_array_manager_invalid_test
  278. def test_1d_object_array_does_not_copy(self, using_infer_string):
  279. # https://github.com/pandas-dev/pandas/issues/39272
  280. arr = np.array(["a", "b"], dtype="object")
  281. df = DataFrame(arr, copy=False)
  282. if using_infer_string:
  283. if df[0].dtype.storage == "pyarrow":
  284. # object dtype strings are converted to arrow memory,
  285. # no numpy arrays to compare
  286. pass
  287. else:
  288. assert np.shares_memory(df[0].to_numpy(), arr)
  289. else:
  290. assert np.shares_memory(df.values, arr)
  291. df = DataFrame(arr, dtype=object, copy=False)
  292. assert np.shares_memory(df.values, arr)
  293. @td.skip_array_manager_invalid_test
  294. def test_2d_object_array_does_not_copy(self, using_infer_string):
  295. # https://github.com/pandas-dev/pandas/issues/39272
  296. arr = np.array([["a", "b"], ["c", "d"]], dtype="object")
  297. df = DataFrame(arr, copy=False)
  298. if using_infer_string:
  299. if df[0].dtype.storage == "pyarrow":
  300. # object dtype strings are converted to arrow memory,
  301. # no numpy arrays to compare
  302. pass
  303. else:
  304. assert np.shares_memory(df[0].to_numpy(), arr)
  305. else:
  306. assert np.shares_memory(df.values, arr)
  307. df = DataFrame(arr, dtype=object, copy=False)
  308. assert np.shares_memory(df.values, arr)
  309. def test_constructor_dtype_list_data(self):
  310. df = DataFrame([[1, "2"], [None, "a"]], dtype=object)
  311. assert df.loc[1, 0] is None
  312. assert df.loc[0, 1] == "2"
  313. def test_constructor_list_of_2d_raises(self):
  314. # https://github.com/pandas-dev/pandas/issues/32289
  315. a = DataFrame()
  316. b = np.empty((0, 0))
  317. with pytest.raises(ValueError, match=r"shape=\(1, 0, 0\)"):
  318. DataFrame([a])
  319. with pytest.raises(ValueError, match=r"shape=\(1, 0, 0\)"):
  320. DataFrame([b])
  321. a = DataFrame({"A": [1, 2]})
  322. with pytest.raises(ValueError, match=r"shape=\(2, 2, 1\)"):
  323. DataFrame([a, a])
  324. @pytest.mark.parametrize(
  325. "typ, ad",
  326. [
  327. # mixed floating and integer coexist in the same frame
  328. ["float", {}],
  329. # add lots of types
  330. ["float", {"A": 1, "B": "foo", "C": "bar"}],
  331. # GH 622
  332. ["int", {}],
  333. ],
  334. )
  335. def test_constructor_mixed_dtypes(self, typ, ad):
  336. if typ == "int":
  337. dtypes = MIXED_INT_DTYPES
  338. arrays = [
  339. np.array(np.random.default_rng(2).random(10), dtype=d) for d in dtypes
  340. ]
  341. elif typ == "float":
  342. dtypes = MIXED_FLOAT_DTYPES
  343. arrays = [
  344. np.array(np.random.default_rng(2).integers(10, size=10), dtype=d)
  345. for d in dtypes
  346. ]
  347. for d, a in zip(dtypes, arrays):
  348. assert a.dtype == d
  349. ad.update(dict(zip(dtypes, arrays)))
  350. df = DataFrame(ad)
  351. dtypes = MIXED_FLOAT_DTYPES + MIXED_INT_DTYPES
  352. for d in dtypes:
  353. if d in df:
  354. assert df.dtypes[d] == d
  355. def test_constructor_complex_dtypes(self):
  356. # GH10952
  357. a = np.random.default_rng(2).random(10).astype(np.complex64)
  358. b = np.random.default_rng(2).random(10).astype(np.complex128)
  359. df = DataFrame({"a": a, "b": b})
  360. assert a.dtype == df.a.dtype
  361. assert b.dtype == df.b.dtype
  362. def test_constructor_dtype_str_na_values(self, string_dtype):
  363. # https://github.com/pandas-dev/pandas/issues/21083
  364. df = DataFrame({"A": ["x", None]}, dtype=string_dtype)
  365. result = df.isna()
  366. expected = DataFrame({"A": [False, True]})
  367. tm.assert_frame_equal(result, expected)
  368. assert df.iloc[1, 0] is None
  369. df = DataFrame({"A": ["x", np.nan]}, dtype=string_dtype)
  370. assert np.isnan(df.iloc[1, 0])
  371. def test_constructor_rec(self, float_frame):
  372. rec = float_frame.to_records(index=False)
  373. rec.dtype.names = list(rec.dtype.names)[::-1]
  374. index = float_frame.index
  375. df = DataFrame(rec)
  376. tm.assert_index_equal(df.columns, Index(rec.dtype.names))
  377. df2 = DataFrame(rec, index=index)
  378. tm.assert_index_equal(df2.columns, Index(rec.dtype.names))
  379. tm.assert_index_equal(df2.index, index)
  380. # case with columns != the ones we would infer from the data
  381. rng = np.arange(len(rec))[::-1]
  382. df3 = DataFrame(rec, index=rng, columns=["C", "B"])
  383. expected = DataFrame(rec, index=rng).reindex(columns=["C", "B"])
  384. tm.assert_frame_equal(df3, expected)
  385. def test_constructor_bool(self):
  386. df = DataFrame({0: np.ones(10, dtype=bool), 1: np.zeros(10, dtype=bool)})
  387. assert df.values.dtype == np.bool_
  388. def test_constructor_overflow_int64(self):
  389. # see gh-14881
  390. values = np.array([2**64 - i for i in range(1, 10)], dtype=np.uint64)
  391. result = DataFrame({"a": values})
  392. assert result["a"].dtype == np.uint64
  393. # see gh-2355
  394. data_scores = [
  395. (6311132704823138710, 273),
  396. (2685045978526272070, 23),
  397. (8921811264899370420, 45),
  398. (17019687244989530680, 270),
  399. (9930107427299601010, 273),
  400. ]
  401. dtype = [("uid", "u8"), ("score", "u8")]
  402. data = np.zeros((len(data_scores),), dtype=dtype)
  403. data[:] = data_scores
  404. df_crawls = DataFrame(data)
  405. assert df_crawls["uid"].dtype == np.uint64
  406. @pytest.mark.parametrize(
  407. "values",
  408. [
  409. np.array([2**64], dtype=object),
  410. np.array([2**65]),
  411. [2**64 + 1],
  412. np.array([-(2**63) - 4], dtype=object),
  413. np.array([-(2**64) - 1]),
  414. [-(2**65) - 2],
  415. ],
  416. )
  417. def test_constructor_int_overflow(self, values):
  418. # see gh-18584
  419. value = values[0]
  420. result = DataFrame(values)
  421. assert result[0].dtype == object
  422. assert result[0][0] == value
  423. @pytest.mark.parametrize(
  424. "values",
  425. [
  426. np.array([1], dtype=np.uint16),
  427. np.array([1], dtype=np.uint32),
  428. np.array([1], dtype=np.uint64),
  429. [np.uint16(1)],
  430. [np.uint32(1)],
  431. [np.uint64(1)],
  432. ],
  433. )
  434. def test_constructor_numpy_uints(self, values):
  435. # GH#47294
  436. value = values[0]
  437. result = DataFrame(values)
  438. assert result[0].dtype == value.dtype
  439. assert result[0][0] == value
  440. def test_constructor_ordereddict(self):
  441. nitems = 100
  442. nums = list(range(nitems))
  443. np.random.default_rng(2).shuffle(nums)
  444. expected = [f"A{i:d}" for i in nums]
  445. df = DataFrame(OrderedDict(zip(expected, [[0]] * nitems)))
  446. assert expected == list(df.columns)
  447. def test_constructor_dict(self):
  448. datetime_series = Series(
  449. np.arange(30, dtype=np.float64), index=date_range("2020-01-01", periods=30)
  450. )
  451. # test expects index shifted by 5
  452. datetime_series_short = datetime_series[5:]
  453. frame = DataFrame({"col1": datetime_series, "col2": datetime_series_short})
  454. # col2 is padded with NaN
  455. assert len(datetime_series) == 30
  456. assert len(datetime_series_short) == 25
  457. tm.assert_series_equal(frame["col1"], datetime_series.rename("col1"))
  458. exp = Series(
  459. np.concatenate([[np.nan] * 5, datetime_series_short.values]),
  460. index=datetime_series.index,
  461. name="col2",
  462. )
  463. tm.assert_series_equal(exp, frame["col2"])
  464. frame = DataFrame(
  465. {"col1": datetime_series, "col2": datetime_series_short},
  466. columns=["col2", "col3", "col4"],
  467. )
  468. assert len(frame) == len(datetime_series_short)
  469. assert "col1" not in frame
  470. assert isna(frame["col3"]).all()
  471. # Corner cases
  472. assert len(DataFrame()) == 0
  473. # mix dict and array, wrong size - no spec for which error should raise
  474. # first
  475. msg = "Mixing dicts with non-Series may lead to ambiguous ordering."
  476. with pytest.raises(ValueError, match=msg):
  477. DataFrame({"A": {"a": "a", "b": "b"}, "B": ["a", "b", "c"]})
  478. def test_constructor_dict_length1(self):
  479. # Length-one dict micro-optimization
  480. frame = DataFrame({"A": {"1": 1, "2": 2}})
  481. tm.assert_index_equal(frame.index, Index(["1", "2"]))
  482. def test_constructor_dict_with_index(self):
  483. # empty dict plus index
  484. idx = Index([0, 1, 2])
  485. frame = DataFrame({}, index=idx)
  486. assert frame.index is idx
  487. def test_constructor_dict_with_index_and_columns(self):
  488. # empty dict with index and columns
  489. idx = Index([0, 1, 2])
  490. frame = DataFrame({}, index=idx, columns=idx)
  491. assert frame.index is idx
  492. assert frame.columns is idx
  493. assert len(frame._series) == 3
  494. def test_constructor_dict_of_empty_lists(self):
  495. # with dict of empty list and Series
  496. frame = DataFrame({"A": [], "B": []}, columns=["A", "B"])
  497. tm.assert_index_equal(frame.index, RangeIndex(0), exact=True)
  498. def test_constructor_dict_with_none(self):
  499. # GH 14381
  500. # Dict with None value
  501. frame_none = DataFrame({"a": None}, index=[0])
  502. frame_none_list = DataFrame({"a": [None]}, index=[0])
  503. assert frame_none._get_value(0, "a") is None
  504. assert frame_none_list._get_value(0, "a") is None
  505. tm.assert_frame_equal(frame_none, frame_none_list)
  506. def test_constructor_dict_errors(self):
  507. # GH10856
  508. # dict with scalar values should raise error, even if columns passed
  509. msg = "If using all scalar values, you must pass an index"
  510. with pytest.raises(ValueError, match=msg):
  511. DataFrame({"a": 0.7})
  512. with pytest.raises(ValueError, match=msg):
  513. DataFrame({"a": 0.7}, columns=["a"])
  514. @pytest.mark.parametrize("scalar", [2, np.nan, None, "D"])
  515. def test_constructor_invalid_items_unused(self, scalar):
  516. # No error if invalid (scalar) value is in fact not used:
  517. result = DataFrame({"a": scalar}, columns=["b"])
  518. expected = DataFrame(columns=["b"])
  519. tm.assert_frame_equal(result, expected)
  520. @pytest.mark.parametrize("value", [2, np.nan, None, float("nan")])
  521. def test_constructor_dict_nan_key(self, value):
  522. # GH 18455
  523. cols = [1, value, 3]
  524. idx = ["a", value]
  525. values = [[0, 3], [1, 4], [2, 5]]
  526. data = {cols[c]: Series(values[c], index=idx) for c in range(3)}
  527. result = DataFrame(data).sort_values(1).sort_values("a", axis=1)
  528. expected = DataFrame(
  529. np.arange(6, dtype="int64").reshape(2, 3), index=idx, columns=cols
  530. )
  531. tm.assert_frame_equal(result, expected)
  532. result = DataFrame(data, index=idx).sort_values("a", axis=1)
  533. tm.assert_frame_equal(result, expected)
  534. result = DataFrame(data, index=idx, columns=cols)
  535. tm.assert_frame_equal(result, expected)
  536. @pytest.mark.parametrize("value", [np.nan, None, float("nan")])
  537. def test_constructor_dict_nan_tuple_key(self, value):
  538. # GH 18455
  539. cols = Index([(11, 21), (value, 22), (13, value)])
  540. idx = Index([("a", value), (value, 2)])
  541. values = [[0, 3], [1, 4], [2, 5]]
  542. data = {cols[c]: Series(values[c], index=idx) for c in range(3)}
  543. result = DataFrame(data).sort_values((11, 21)).sort_values(("a", value), axis=1)
  544. expected = DataFrame(
  545. np.arange(6, dtype="int64").reshape(2, 3), index=idx, columns=cols
  546. )
  547. tm.assert_frame_equal(result, expected)
  548. result = DataFrame(data, index=idx).sort_values(("a", value), axis=1)
  549. tm.assert_frame_equal(result, expected)
  550. result = DataFrame(data, index=idx, columns=cols)
  551. tm.assert_frame_equal(result, expected)
  552. def test_constructor_dict_order_insertion(self):
  553. datetime_series = Series(
  554. np.arange(10, dtype=np.float64), index=date_range("2020-01-01", periods=10)
  555. )
  556. datetime_series_short = datetime_series[:5]
  557. # GH19018
  558. # initialization ordering: by insertion order if python>= 3.6
  559. d = {"b": datetime_series_short, "a": datetime_series}
  560. frame = DataFrame(data=d)
  561. expected = DataFrame(data=d, columns=list("ba"))
  562. tm.assert_frame_equal(frame, expected)
  563. def test_constructor_dict_nan_key_and_columns(self):
  564. # GH 16894
  565. result = DataFrame({np.nan: [1, 2], 2: [2, 3]}, columns=[np.nan, 2])
  566. expected = DataFrame([[1, 2], [2, 3]], columns=[np.nan, 2])
  567. tm.assert_frame_equal(result, expected)
  568. def test_constructor_multi_index(self):
  569. # GH 4078
  570. # construction error with mi and all-nan frame
  571. tuples = [(2, 3), (3, 3), (3, 3)]
  572. mi = MultiIndex.from_tuples(tuples)
  573. df = DataFrame(index=mi, columns=mi)
  574. assert isna(df).values.ravel().all()
  575. tuples = [(3, 3), (2, 3), (3, 3)]
  576. mi = MultiIndex.from_tuples(tuples)
  577. df = DataFrame(index=mi, columns=mi)
  578. assert isna(df).values.ravel().all()
  579. def test_constructor_2d_index(self):
  580. # GH 25416
  581. # handling of 2d index in construction
  582. df = DataFrame([[1]], columns=[[1]], index=[1, 2])
  583. expected = DataFrame(
  584. [1, 1],
  585. index=Index([1, 2], dtype="int64"),
  586. columns=MultiIndex(levels=[[1]], codes=[[0]]),
  587. )
  588. tm.assert_frame_equal(df, expected)
  589. df = DataFrame([[1]], columns=[[1]], index=[[1, 2]])
  590. expected = DataFrame(
  591. [1, 1],
  592. index=MultiIndex(levels=[[1, 2]], codes=[[0, 1]]),
  593. columns=MultiIndex(levels=[[1]], codes=[[0]]),
  594. )
  595. tm.assert_frame_equal(df, expected)
  596. def test_constructor_error_msgs(self):
  597. msg = "Empty data passed with indices specified."
  598. # passing an empty array with columns specified.
  599. with pytest.raises(ValueError, match=msg):
  600. DataFrame(np.empty(0), index=[1])
  601. msg = "Mixing dicts with non-Series may lead to ambiguous ordering."
  602. # mix dict and array, wrong size
  603. with pytest.raises(ValueError, match=msg):
  604. DataFrame({"A": {"a": "a", "b": "b"}, "B": ["a", "b", "c"]})
  605. # wrong size ndarray, GH 3105
  606. msg = r"Shape of passed values is \(4, 3\), indices imply \(3, 3\)"
  607. with pytest.raises(ValueError, match=msg):
  608. DataFrame(
  609. np.arange(12).reshape((4, 3)),
  610. columns=["foo", "bar", "baz"],
  611. index=date_range("2000-01-01", periods=3),
  612. )
  613. arr = np.array([[4, 5, 6]])
  614. msg = r"Shape of passed values is \(1, 3\), indices imply \(1, 4\)"
  615. with pytest.raises(ValueError, match=msg):
  616. DataFrame(index=[0], columns=range(4), data=arr)
  617. arr = np.array([4, 5, 6])
  618. msg = r"Shape of passed values is \(3, 1\), indices imply \(1, 4\)"
  619. with pytest.raises(ValueError, match=msg):
  620. DataFrame(index=[0], columns=range(4), data=arr)
  621. # higher dim raise exception
  622. with pytest.raises(ValueError, match="Must pass 2-d input"):
  623. DataFrame(np.zeros((3, 3, 3)), columns=["A", "B", "C"], index=[1])
  624. # wrong size axis labels
  625. msg = r"Shape of passed values is \(2, 3\), indices imply \(1, 3\)"
  626. with pytest.raises(ValueError, match=msg):
  627. DataFrame(
  628. np.random.default_rng(2).random((2, 3)),
  629. columns=["A", "B", "C"],
  630. index=[1],
  631. )
  632. msg = r"Shape of passed values is \(2, 3\), indices imply \(2, 2\)"
  633. with pytest.raises(ValueError, match=msg):
  634. DataFrame(
  635. np.random.default_rng(2).random((2, 3)),
  636. columns=["A", "B"],
  637. index=[1, 2],
  638. )
  639. # gh-26429
  640. msg = "2 columns passed, passed data had 10 columns"
  641. with pytest.raises(ValueError, match=msg):
  642. DataFrame((range(10), range(10, 20)), columns=("ones", "twos"))
  643. msg = "If using all scalar values, you must pass an index"
  644. with pytest.raises(ValueError, match=msg):
  645. DataFrame({"a": False, "b": True})
  646. def test_constructor_subclass_dict(self, dict_subclass):
  647. # Test for passing dict subclass to constructor
  648. data = {
  649. "col1": dict_subclass((x, 10.0 * x) for x in range(10)),
  650. "col2": dict_subclass((x, 20.0 * x) for x in range(10)),
  651. }
  652. df = DataFrame(data)
  653. refdf = DataFrame({col: dict(val.items()) for col, val in data.items()})
  654. tm.assert_frame_equal(refdf, df)
  655. data = dict_subclass(data.items())
  656. df = DataFrame(data)
  657. tm.assert_frame_equal(refdf, df)
  658. def test_constructor_defaultdict(self, float_frame):
  659. # try with defaultdict
  660. data = {}
  661. float_frame.loc[: float_frame.index[10], "B"] = np.nan
  662. for k, v in float_frame.items():
  663. dct = defaultdict(dict)
  664. dct.update(v.to_dict())
  665. data[k] = dct
  666. frame = DataFrame(data)
  667. expected = frame.reindex(index=float_frame.index)
  668. tm.assert_frame_equal(float_frame, expected)
  669. def test_constructor_dict_block(self):
  670. expected = np.array([[4.0, 3.0, 2.0, 1.0]])
  671. df = DataFrame(
  672. {"d": [4.0], "c": [3.0], "b": [2.0], "a": [1.0]},
  673. columns=["d", "c", "b", "a"],
  674. )
  675. tm.assert_numpy_array_equal(df.values, expected)
  676. def test_constructor_dict_cast(self, using_infer_string):
  677. # cast float tests
  678. test_data = {"A": {"1": 1, "2": 2}, "B": {"1": "1", "2": "2", "3": "3"}}
  679. frame = DataFrame(test_data, dtype=float)
  680. assert len(frame) == 3
  681. assert frame["B"].dtype == np.float64
  682. assert frame["A"].dtype == np.float64
  683. frame = DataFrame(test_data)
  684. assert len(frame) == 3
  685. assert frame["B"].dtype == np.object_ if not using_infer_string else "str"
  686. assert frame["A"].dtype == np.float64
  687. def test_constructor_dict_cast2(self):
  688. # can't cast to float
  689. test_data = {
  690. "A": dict(zip(range(20), [f"word_{i}" for i in range(20)])),
  691. "B": dict(zip(range(15), np.random.default_rng(2).standard_normal(15))),
  692. }
  693. with pytest.raises(ValueError, match="could not convert string"):
  694. DataFrame(test_data, dtype=float)
  695. def test_constructor_dict_dont_upcast(self):
  696. d = {"Col1": {"Row1": "A String", "Row2": np.nan}}
  697. df = DataFrame(d)
  698. assert isinstance(df["Col1"]["Row2"], float)
  699. def test_constructor_dict_dont_upcast2(self):
  700. dm = DataFrame([[1, 2], ["a", "b"]], index=[1, 2], columns=[1, 2])
  701. assert isinstance(dm[1][1], int)
  702. def test_constructor_dict_of_tuples(self):
  703. # GH #1491
  704. data = {"a": (1, 2, 3), "b": (4, 5, 6)}
  705. result = DataFrame(data)
  706. expected = DataFrame({k: list(v) for k, v in data.items()})
  707. tm.assert_frame_equal(result, expected, check_dtype=False)
  708. def test_constructor_dict_of_ranges(self):
  709. # GH 26356
  710. data = {"a": range(3), "b": range(3, 6)}
  711. result = DataFrame(data)
  712. expected = DataFrame({"a": [0, 1, 2], "b": [3, 4, 5]})
  713. tm.assert_frame_equal(result, expected)
  714. def test_constructor_dict_of_iterators(self):
  715. # GH 26349
  716. data = {"a": iter(range(3)), "b": reversed(range(3))}
  717. result = DataFrame(data)
  718. expected = DataFrame({"a": [0, 1, 2], "b": [2, 1, 0]})
  719. tm.assert_frame_equal(result, expected)
  720. def test_constructor_dict_of_generators(self):
  721. # GH 26349
  722. data = {"a": (i for i in (range(3))), "b": (i for i in reversed(range(3)))}
  723. result = DataFrame(data)
  724. expected = DataFrame({"a": [0, 1, 2], "b": [2, 1, 0]})
  725. tm.assert_frame_equal(result, expected)
  726. def test_constructor_dict_multiindex(self):
  727. d = {
  728. ("a", "a"): {("i", "i"): 0, ("i", "j"): 1, ("j", "i"): 2},
  729. ("b", "a"): {("i", "i"): 6, ("i", "j"): 5, ("j", "i"): 4},
  730. ("b", "c"): {("i", "i"): 7, ("i", "j"): 8, ("j", "i"): 9},
  731. }
  732. _d = sorted(d.items())
  733. df = DataFrame(d)
  734. expected = DataFrame(
  735. [x[1] for x in _d], index=MultiIndex.from_tuples([x[0] for x in _d])
  736. ).T
  737. expected.index = MultiIndex.from_tuples(expected.index)
  738. tm.assert_frame_equal(
  739. df,
  740. expected,
  741. )
  742. d["z"] = {"y": 123.0, ("i", "i"): 111, ("i", "j"): 111, ("j", "i"): 111}
  743. _d.insert(0, ("z", d["z"]))
  744. expected = DataFrame(
  745. [x[1] for x in _d], index=Index([x[0] for x in _d], tupleize_cols=False)
  746. ).T
  747. expected.index = Index(expected.index, tupleize_cols=False)
  748. df = DataFrame(d)
  749. df = df.reindex(columns=expected.columns, index=expected.index)
  750. tm.assert_frame_equal(df, expected)
  751. def test_constructor_dict_datetime64_index(self):
  752. # GH 10160
  753. dates_as_str = ["1984-02-19", "1988-11-06", "1989-12-03", "1990-03-15"]
  754. def create_data(constructor):
  755. return {i: {constructor(s): 2 * i} for i, s in enumerate(dates_as_str)}
  756. data_datetime64 = create_data(np.datetime64)
  757. data_datetime = create_data(lambda x: datetime.strptime(x, "%Y-%m-%d"))
  758. data_Timestamp = create_data(Timestamp)
  759. expected = DataFrame(
  760. [
  761. {0: 0, 1: None, 2: None, 3: None},
  762. {0: None, 1: 2, 2: None, 3: None},
  763. {0: None, 1: None, 2: 4, 3: None},
  764. {0: None, 1: None, 2: None, 3: 6},
  765. ],
  766. index=[Timestamp(dt) for dt in dates_as_str],
  767. )
  768. result_datetime64 = DataFrame(data_datetime64)
  769. result_datetime = DataFrame(data_datetime)
  770. result_Timestamp = DataFrame(data_Timestamp)
  771. tm.assert_frame_equal(result_datetime64, expected)
  772. tm.assert_frame_equal(result_datetime, expected)
  773. tm.assert_frame_equal(result_Timestamp, expected)
  774. @pytest.mark.parametrize(
  775. "klass,name",
  776. [
  777. (lambda x: np.timedelta64(x, "D"), "timedelta64"),
  778. (lambda x: timedelta(days=x), "pytimedelta"),
  779. (lambda x: Timedelta(x, "D"), "Timedelta[ns]"),
  780. (lambda x: Timedelta(x, "D").as_unit("s"), "Timedelta[s]"),
  781. ],
  782. )
  783. def test_constructor_dict_timedelta64_index(self, klass, name):
  784. # GH 10160
  785. td_as_int = [1, 2, 3, 4]
  786. data = {i: {klass(s): 2 * i} for i, s in enumerate(td_as_int)}
  787. expected = DataFrame(
  788. [
  789. {0: 0, 1: None, 2: None, 3: None},
  790. {0: None, 1: 2, 2: None, 3: None},
  791. {0: None, 1: None, 2: 4, 3: None},
  792. {0: None, 1: None, 2: None, 3: 6},
  793. ],
  794. index=[Timedelta(td, "D") for td in td_as_int],
  795. )
  796. result = DataFrame(data)
  797. tm.assert_frame_equal(result, expected)
  798. def test_constructor_period_dict(self):
  799. # PeriodIndex
  800. a = pd.PeriodIndex(["2012-01", "NaT", "2012-04"], freq="M")
  801. b = pd.PeriodIndex(["2012-02-01", "2012-03-01", "NaT"], freq="D")
  802. df = DataFrame({"a": a, "b": b})
  803. assert df["a"].dtype == a.dtype
  804. assert df["b"].dtype == b.dtype
  805. # list of periods
  806. df = DataFrame({"a": a.astype(object).tolist(), "b": b.astype(object).tolist()})
  807. assert df["a"].dtype == a.dtype
  808. assert df["b"].dtype == b.dtype
  809. def test_constructor_dict_extension_scalar(self, ea_scalar_and_dtype):
  810. ea_scalar, ea_dtype = ea_scalar_and_dtype
  811. df = DataFrame({"a": ea_scalar}, index=[0])
  812. assert df["a"].dtype == ea_dtype
  813. expected = DataFrame(index=[0], columns=["a"], data=ea_scalar)
  814. tm.assert_frame_equal(df, expected)
  815. @pytest.mark.parametrize(
  816. "data,dtype",
  817. [
  818. (Period("2020-01"), PeriodDtype("M")),
  819. (Interval(left=0, right=5), IntervalDtype("int64", "right")),
  820. (
  821. Timestamp("2011-01-01", tz="US/Eastern"),
  822. DatetimeTZDtype(unit="s", tz="US/Eastern"),
  823. ),
  824. ],
  825. )
  826. def test_constructor_extension_scalar_data(self, data, dtype):
  827. # GH 34832
  828. df = DataFrame(index=[0, 1], columns=["a", "b"], data=data)
  829. assert df["a"].dtype == dtype
  830. assert df["b"].dtype == dtype
  831. arr = pd.array([data] * 2, dtype=dtype)
  832. expected = DataFrame({"a": arr, "b": arr})
  833. tm.assert_frame_equal(df, expected)
  834. def test_nested_dict_frame_constructor(self):
  835. rng = pd.period_range("1/1/2000", periods=5)
  836. df = DataFrame(np.random.default_rng(2).standard_normal((10, 5)), columns=rng)
  837. data = {}
  838. for col in df.columns:
  839. for row in df.index:
  840. data.setdefault(col, {})[row] = df._get_value(row, col)
  841. result = DataFrame(data, columns=rng)
  842. tm.assert_frame_equal(result, df)
  843. data = {}
  844. for col in df.columns:
  845. for row in df.index:
  846. data.setdefault(row, {})[col] = df._get_value(row, col)
  847. result = DataFrame(data, index=rng).T
  848. tm.assert_frame_equal(result, df)
  849. def _check_basic_constructor(self, empty):
  850. # mat: 2d matrix with shape (3, 2) to input. empty - makes sized
  851. # objects
  852. mat = empty((2, 3), dtype=float)
  853. # 2-D input
  854. frame = DataFrame(mat, columns=["A", "B", "C"], index=[1, 2])
  855. assert len(frame.index) == 2
  856. assert len(frame.columns) == 3
  857. # 1-D input
  858. frame = DataFrame(empty((3,)), columns=["A"], index=[1, 2, 3])
  859. assert len(frame.index) == 3
  860. assert len(frame.columns) == 1
  861. if empty is not np.ones:
  862. msg = r"Cannot convert non-finite values \(NA or inf\) to integer"
  863. with pytest.raises(IntCastingNaNError, match=msg):
  864. DataFrame(mat, columns=["A", "B", "C"], index=[1, 2], dtype=np.int64)
  865. return
  866. else:
  867. frame = DataFrame(
  868. mat, columns=["A", "B", "C"], index=[1, 2], dtype=np.int64
  869. )
  870. assert frame.values.dtype == np.int64
  871. # wrong size axis labels
  872. msg = r"Shape of passed values is \(2, 3\), indices imply \(1, 3\)"
  873. with pytest.raises(ValueError, match=msg):
  874. DataFrame(mat, columns=["A", "B", "C"], index=[1])
  875. msg = r"Shape of passed values is \(2, 3\), indices imply \(2, 2\)"
  876. with pytest.raises(ValueError, match=msg):
  877. DataFrame(mat, columns=["A", "B"], index=[1, 2])
  878. # higher dim raise exception
  879. with pytest.raises(ValueError, match="Must pass 2-d input"):
  880. DataFrame(empty((3, 3, 3)), columns=["A", "B", "C"], index=[1])
  881. # automatic labeling
  882. frame = DataFrame(mat)
  883. tm.assert_index_equal(frame.index, Index(range(2)), exact=True)
  884. tm.assert_index_equal(frame.columns, Index(range(3)), exact=True)
  885. frame = DataFrame(mat, index=[1, 2])
  886. tm.assert_index_equal(frame.columns, Index(range(3)), exact=True)
  887. frame = DataFrame(mat, columns=["A", "B", "C"])
  888. tm.assert_index_equal(frame.index, Index(range(2)), exact=True)
  889. # 0-length axis
  890. frame = DataFrame(empty((0, 3)))
  891. assert len(frame.index) == 0
  892. frame = DataFrame(empty((3, 0)))
  893. assert len(frame.columns) == 0
  894. def test_constructor_ndarray(self):
  895. self._check_basic_constructor(np.ones)
  896. frame = DataFrame(["foo", "bar"], index=[0, 1], columns=["A"])
  897. assert len(frame) == 2
  898. def test_constructor_maskedarray(self):
  899. self._check_basic_constructor(ma.masked_all)
  900. # Check non-masked values
  901. mat = ma.masked_all((2, 3), dtype=float)
  902. mat[0, 0] = 1.0
  903. mat[1, 2] = 2.0
  904. frame = DataFrame(mat, columns=["A", "B", "C"], index=[1, 2])
  905. assert 1.0 == frame["A"][1]
  906. assert 2.0 == frame["C"][2]
  907. # what is this even checking??
  908. mat = ma.masked_all((2, 3), dtype=float)
  909. frame = DataFrame(mat, columns=["A", "B", "C"], index=[1, 2])
  910. assert np.all(~np.asarray(frame == frame))
  911. @pytest.mark.filterwarnings(
  912. "ignore:elementwise comparison failed:DeprecationWarning"
  913. )
  914. def test_constructor_maskedarray_nonfloat(self):
  915. # masked int promoted to float
  916. mat = ma.masked_all((2, 3), dtype=int)
  917. # 2-D input
  918. frame = DataFrame(mat, columns=["A", "B", "C"], index=[1, 2])
  919. assert len(frame.index) == 2
  920. assert len(frame.columns) == 3
  921. assert np.all(~np.asarray(frame == frame))
  922. # cast type
  923. frame = DataFrame(mat, columns=["A", "B", "C"], index=[1, 2], dtype=np.float64)
  924. assert frame.values.dtype == np.float64
  925. # Check non-masked values
  926. mat2 = ma.copy(mat)
  927. mat2[0, 0] = 1
  928. mat2[1, 2] = 2
  929. frame = DataFrame(mat2, columns=["A", "B", "C"], index=[1, 2])
  930. assert 1 == frame["A"][1]
  931. assert 2 == frame["C"][2]
  932. # masked np.datetime64 stays (use NaT as null)
  933. mat = ma.masked_all((2, 3), dtype="M8[ns]")
  934. # 2-D input
  935. frame = DataFrame(mat, columns=["A", "B", "C"], index=[1, 2])
  936. assert len(frame.index) == 2
  937. assert len(frame.columns) == 3
  938. assert isna(frame).values.all()
  939. # cast type
  940. msg = r"datetime64\[ns\] values and dtype=int64 is not supported"
  941. with pytest.raises(TypeError, match=msg):
  942. DataFrame(mat, columns=["A", "B", "C"], index=[1, 2], dtype=np.int64)
  943. # Check non-masked values
  944. mat2 = ma.copy(mat)
  945. mat2[0, 0] = 1
  946. mat2[1, 2] = 2
  947. frame = DataFrame(mat2, columns=["A", "B", "C"], index=[1, 2])
  948. assert 1 == frame["A"].astype("i8")[1]
  949. assert 2 == frame["C"].astype("i8")[2]
  950. # masked bool promoted to object
  951. mat = ma.masked_all((2, 3), dtype=bool)
  952. # 2-D input
  953. frame = DataFrame(mat, columns=["A", "B", "C"], index=[1, 2])
  954. assert len(frame.index) == 2
  955. assert len(frame.columns) == 3
  956. assert np.all(~np.asarray(frame == frame))
  957. # cast type
  958. frame = DataFrame(mat, columns=["A", "B", "C"], index=[1, 2], dtype=object)
  959. assert frame.values.dtype == object
  960. # Check non-masked values
  961. mat2 = ma.copy(mat)
  962. mat2[0, 0] = True
  963. mat2[1, 2] = False
  964. frame = DataFrame(mat2, columns=["A", "B", "C"], index=[1, 2])
  965. assert frame["A"][1] is True
  966. assert frame["C"][2] is False
  967. def test_constructor_maskedarray_hardened(self):
  968. # Check numpy masked arrays with hard masks -- from GH24574
  969. mat_hard = ma.masked_all((2, 2), dtype=float).harden_mask()
  970. result = DataFrame(mat_hard, columns=["A", "B"], index=[1, 2])
  971. expected = DataFrame(
  972. {"A": [np.nan, np.nan], "B": [np.nan, np.nan]},
  973. columns=["A", "B"],
  974. index=[1, 2],
  975. dtype=float,
  976. )
  977. tm.assert_frame_equal(result, expected)
  978. # Check case where mask is hard but no data are masked
  979. mat_hard = ma.ones((2, 2), dtype=float).harden_mask()
  980. result = DataFrame(mat_hard, columns=["A", "B"], index=[1, 2])
  981. expected = DataFrame(
  982. {"A": [1.0, 1.0], "B": [1.0, 1.0]},
  983. columns=["A", "B"],
  984. index=[1, 2],
  985. dtype=float,
  986. )
  987. tm.assert_frame_equal(result, expected)
  988. def test_constructor_maskedrecarray_dtype(self):
  989. # Ensure constructor honors dtype
  990. data = np.ma.array(
  991. np.ma.zeros(5, dtype=[("date", "<f8"), ("price", "<f8")]), mask=[False] * 5
  992. )
  993. data = data.view(mrecords.mrecarray)
  994. with pytest.raises(TypeError, match=r"Pass \{name: data\[name\]"):
  995. # Support for MaskedRecords deprecated GH#40363
  996. DataFrame(data, dtype=int)
  997. def test_constructor_corner_shape(self):
  998. df = DataFrame(index=[])
  999. assert df.values.shape == (0, 0)
  1000. @pytest.mark.parametrize(
  1001. "data, index, columns, dtype, expected",
  1002. [
  1003. (None, list(range(10)), ["a", "b"], object, np.object_),
  1004. (None, None, ["a", "b"], "int64", np.dtype("int64")),
  1005. (None, list(range(10)), ["a", "b"], int, np.dtype("float64")),
  1006. ({}, None, ["foo", "bar"], None, np.object_),
  1007. ({"b": 1}, list(range(10)), list("abc"), int, np.dtype("float64")),
  1008. ],
  1009. )
  1010. def test_constructor_dtype(self, data, index, columns, dtype, expected):
  1011. df = DataFrame(data, index, columns, dtype)
  1012. assert df.values.dtype == expected
  1013. @pytest.mark.parametrize(
  1014. "data,input_dtype,expected_dtype",
  1015. (
  1016. ([True, False, None], "boolean", pd.BooleanDtype),
  1017. ([1.0, 2.0, None], "Float64", pd.Float64Dtype),
  1018. ([1, 2, None], "Int64", pd.Int64Dtype),
  1019. (["a", "b", "c"], "string", pd.StringDtype),
  1020. ),
  1021. )
  1022. def test_constructor_dtype_nullable_extension_arrays(
  1023. self, data, input_dtype, expected_dtype
  1024. ):
  1025. df = DataFrame({"a": data}, dtype=input_dtype)
  1026. assert df["a"].dtype == expected_dtype()
  1027. def test_constructor_scalar_inference(self, using_infer_string):
  1028. data = {"int": 1, "bool": True, "float": 3.0, "complex": 4j, "object": "foo"}
  1029. df = DataFrame(data, index=np.arange(10))
  1030. assert df["int"].dtype == np.int64
  1031. assert df["bool"].dtype == np.bool_
  1032. assert df["float"].dtype == np.float64
  1033. assert df["complex"].dtype == np.complex128
  1034. assert df["object"].dtype == np.object_ if not using_infer_string else "str"
  1035. def test_constructor_arrays_and_scalars(self):
  1036. df = DataFrame({"a": np.random.default_rng(2).standard_normal(10), "b": True})
  1037. exp = DataFrame({"a": df["a"].values, "b": [True] * 10})
  1038. tm.assert_frame_equal(df, exp)
  1039. with pytest.raises(ValueError, match="must pass an index"):
  1040. DataFrame({"a": False, "b": True})
  1041. def test_constructor_DataFrame(self, float_frame):
  1042. df = DataFrame(float_frame)
  1043. tm.assert_frame_equal(df, float_frame)
  1044. df_casted = DataFrame(float_frame, dtype=np.int64)
  1045. assert df_casted.values.dtype == np.int64
  1046. def test_constructor_empty_dataframe(self):
  1047. # GH 20624
  1048. actual = DataFrame(DataFrame(), dtype="object")
  1049. expected = DataFrame([], dtype="object")
  1050. tm.assert_frame_equal(actual, expected)
  1051. def test_constructor_more(self, float_frame):
  1052. # used to be in test_matrix.py
  1053. arr = np.random.default_rng(2).standard_normal(10)
  1054. dm = DataFrame(arr, columns=["A"], index=np.arange(10))
  1055. assert dm.values.ndim == 2
  1056. arr = np.random.default_rng(2).standard_normal(0)
  1057. dm = DataFrame(arr)
  1058. assert dm.values.ndim == 2
  1059. assert dm.values.ndim == 2
  1060. # no data specified
  1061. dm = DataFrame(columns=["A", "B"], index=np.arange(10))
  1062. assert dm.values.shape == (10, 2)
  1063. dm = DataFrame(columns=["A", "B"])
  1064. assert dm.values.shape == (0, 2)
  1065. dm = DataFrame(index=np.arange(10))
  1066. assert dm.values.shape == (10, 0)
  1067. # can't cast
  1068. mat = np.array(["foo", "bar"], dtype=object).reshape(2, 1)
  1069. msg = "could not convert string to float: 'foo'"
  1070. with pytest.raises(ValueError, match=msg):
  1071. DataFrame(mat, index=[0, 1], columns=[0], dtype=float)
  1072. dm = DataFrame(DataFrame(float_frame._series))
  1073. tm.assert_frame_equal(dm, float_frame)
  1074. # int cast
  1075. dm = DataFrame(
  1076. {"A": np.ones(10, dtype=int), "B": np.ones(10, dtype=np.float64)},
  1077. index=np.arange(10),
  1078. )
  1079. assert len(dm.columns) == 2
  1080. assert dm.values.dtype == np.float64
  1081. def test_constructor_empty_list(self):
  1082. df = DataFrame([], index=[])
  1083. expected = DataFrame(index=[])
  1084. tm.assert_frame_equal(df, expected)
  1085. # GH 9939
  1086. df = DataFrame([], columns=["A", "B"])
  1087. expected = DataFrame({}, columns=["A", "B"])
  1088. tm.assert_frame_equal(df, expected)
  1089. # Empty generator: list(empty_gen()) == []
  1090. def empty_gen():
  1091. yield from ()
  1092. df = DataFrame(empty_gen(), columns=["A", "B"])
  1093. tm.assert_frame_equal(df, expected)
  1094. def test_constructor_list_of_lists(self, using_infer_string):
  1095. # GH #484
  1096. df = DataFrame(data=[[1, "a"], [2, "b"]], columns=["num", "str"])
  1097. assert is_integer_dtype(df["num"])
  1098. assert df["str"].dtype == np.object_ if not using_infer_string else "str"
  1099. # GH 4851
  1100. # list of 0-dim ndarrays
  1101. expected = DataFrame({0: np.arange(10)})
  1102. data = [np.array(x) for x in range(10)]
  1103. result = DataFrame(data)
  1104. tm.assert_frame_equal(result, expected)
  1105. def test_nested_pandasarray_matches_nested_ndarray(self):
  1106. # GH#43986
  1107. ser = Series([1, 2])
  1108. arr = np.array([None, None], dtype=object)
  1109. arr[0] = ser
  1110. arr[1] = ser * 2
  1111. df = DataFrame(arr)
  1112. expected = DataFrame(pd.array(arr))
  1113. tm.assert_frame_equal(df, expected)
  1114. assert df.shape == (2, 1)
  1115. tm.assert_numpy_array_equal(df[0].values, arr)
  1116. def test_constructor_list_like_data_nested_list_column(self):
  1117. # GH 32173
  1118. arrays = [list("abcd"), list("cdef")]
  1119. result = DataFrame([[1, 2, 3, 4], [4, 5, 6, 7]], columns=arrays)
  1120. mi = MultiIndex.from_arrays(arrays)
  1121. expected = DataFrame([[1, 2, 3, 4], [4, 5, 6, 7]], columns=mi)
  1122. tm.assert_frame_equal(result, expected)
  1123. def test_constructor_wrong_length_nested_list_column(self):
  1124. # GH 32173
  1125. arrays = [list("abc"), list("cde")]
  1126. msg = "3 columns passed, passed data had 4"
  1127. with pytest.raises(ValueError, match=msg):
  1128. DataFrame([[1, 2, 3, 4], [4, 5, 6, 7]], columns=arrays)
  1129. def test_constructor_unequal_length_nested_list_column(self):
  1130. # GH 32173
  1131. arrays = [list("abcd"), list("cde")]
  1132. # exception raised inside MultiIndex constructor
  1133. msg = "all arrays must be same length"
  1134. with pytest.raises(ValueError, match=msg):
  1135. DataFrame([[1, 2, 3, 4], [4, 5, 6, 7]], columns=arrays)
  1136. @pytest.mark.parametrize(
  1137. "data",
  1138. [
  1139. [[Timestamp("2021-01-01")]],
  1140. [{"x": Timestamp("2021-01-01")}],
  1141. {"x": [Timestamp("2021-01-01")]},
  1142. {"x": Timestamp("2021-01-01").as_unit("ns")},
  1143. ],
  1144. )
  1145. def test_constructor_one_element_data_list(self, data):
  1146. # GH#42810
  1147. result = DataFrame(data, index=[0, 1, 2], columns=["x"])
  1148. expected = DataFrame({"x": [Timestamp("2021-01-01")] * 3})
  1149. tm.assert_frame_equal(result, expected)
  1150. def test_constructor_sequence_like(self):
  1151. # GH 3783
  1152. # collections.Sequence like
  1153. class DummyContainer(abc.Sequence):
  1154. def __init__(self, lst) -> None:
  1155. self._lst = lst
  1156. def __getitem__(self, n):
  1157. return self._lst.__getitem__(n)
  1158. def __len__(self) -> int:
  1159. return self._lst.__len__()
  1160. lst_containers = [DummyContainer([1, "a"]), DummyContainer([2, "b"])]
  1161. columns = ["num", "str"]
  1162. result = DataFrame(lst_containers, columns=columns)
  1163. expected = DataFrame([[1, "a"], [2, "b"]], columns=columns)
  1164. tm.assert_frame_equal(result, expected, check_dtype=False)
  1165. def test_constructor_stdlib_array(self):
  1166. # GH 4297
  1167. # support Array
  1168. result = DataFrame({"A": array.array("i", range(10))})
  1169. expected = DataFrame({"A": list(range(10))})
  1170. tm.assert_frame_equal(result, expected, check_dtype=False)
  1171. expected = DataFrame([list(range(10)), list(range(10))])
  1172. result = DataFrame([array.array("i", range(10)), array.array("i", range(10))])
  1173. tm.assert_frame_equal(result, expected, check_dtype=False)
  1174. def test_constructor_range(self):
  1175. # GH26342
  1176. result = DataFrame(range(10))
  1177. expected = DataFrame(list(range(10)))
  1178. tm.assert_frame_equal(result, expected)
  1179. def test_constructor_list_of_ranges(self):
  1180. result = DataFrame([range(10), range(10)])
  1181. expected = DataFrame([list(range(10)), list(range(10))])
  1182. tm.assert_frame_equal(result, expected)
  1183. def test_constructor_iterable(self):
  1184. # GH 21987
  1185. class Iter:
  1186. def __iter__(self) -> Iterator:
  1187. for i in range(10):
  1188. yield [1, 2, 3]
  1189. expected = DataFrame([[1, 2, 3]] * 10)
  1190. result = DataFrame(Iter())
  1191. tm.assert_frame_equal(result, expected)
  1192. def test_constructor_iterator(self):
  1193. result = DataFrame(iter(range(10)))
  1194. expected = DataFrame(list(range(10)))
  1195. tm.assert_frame_equal(result, expected)
  1196. def test_constructor_list_of_iterators(self):
  1197. result = DataFrame([iter(range(10)), iter(range(10))])
  1198. expected = DataFrame([list(range(10)), list(range(10))])
  1199. tm.assert_frame_equal(result, expected)
  1200. def test_constructor_generator(self):
  1201. # related #2305
  1202. gen1 = (i for i in range(10))
  1203. gen2 = (i for i in range(10))
  1204. expected = DataFrame([list(range(10)), list(range(10))])
  1205. result = DataFrame([gen1, gen2])
  1206. tm.assert_frame_equal(result, expected)
  1207. gen = ([i, "a"] for i in range(10))
  1208. result = DataFrame(gen)
  1209. expected = DataFrame({0: range(10), 1: "a"})
  1210. tm.assert_frame_equal(result, expected, check_dtype=False)
  1211. def test_constructor_list_of_dicts(self):
  1212. result = DataFrame([{}])
  1213. expected = DataFrame(index=RangeIndex(1), columns=[])
  1214. tm.assert_frame_equal(result, expected)
  1215. def test_constructor_ordered_dict_nested_preserve_order(self):
  1216. # see gh-18166
  1217. nested1 = OrderedDict([("b", 1), ("a", 2)])
  1218. nested2 = OrderedDict([("b", 2), ("a", 5)])
  1219. data = OrderedDict([("col2", nested1), ("col1", nested2)])
  1220. result = DataFrame(data)
  1221. data = {"col2": [1, 2], "col1": [2, 5]}
  1222. expected = DataFrame(data=data, index=["b", "a"])
  1223. tm.assert_frame_equal(result, expected)
  1224. @pytest.mark.parametrize("dict_type", [dict, OrderedDict])
  1225. def test_constructor_ordered_dict_preserve_order(self, dict_type):
  1226. # see gh-13304
  1227. expected = DataFrame([[2, 1]], columns=["b", "a"])
  1228. data = dict_type()
  1229. data["b"] = [2]
  1230. data["a"] = [1]
  1231. result = DataFrame(data)
  1232. tm.assert_frame_equal(result, expected)
  1233. data = dict_type()
  1234. data["b"] = 2
  1235. data["a"] = 1
  1236. result = DataFrame([data])
  1237. tm.assert_frame_equal(result, expected)
  1238. @pytest.mark.parametrize("dict_type", [dict, OrderedDict])
  1239. def test_constructor_ordered_dict_conflicting_orders(self, dict_type):
  1240. # the first dict element sets the ordering for the DataFrame,
  1241. # even if there are conflicting orders from subsequent ones
  1242. row_one = dict_type()
  1243. row_one["b"] = 2
  1244. row_one["a"] = 1
  1245. row_two = dict_type()
  1246. row_two["a"] = 1
  1247. row_two["b"] = 2
  1248. row_three = {"b": 2, "a": 1}
  1249. expected = DataFrame([[2, 1], [2, 1]], columns=["b", "a"])
  1250. result = DataFrame([row_one, row_two])
  1251. tm.assert_frame_equal(result, expected)
  1252. expected = DataFrame([[2, 1], [2, 1], [2, 1]], columns=["b", "a"])
  1253. result = DataFrame([row_one, row_two, row_three])
  1254. tm.assert_frame_equal(result, expected)
  1255. def test_constructor_list_of_series_aligned_index(self):
  1256. series = [Series(i, index=["b", "a", "c"], name=str(i)) for i in range(3)]
  1257. result = DataFrame(series)
  1258. expected = DataFrame(
  1259. {"b": [0, 1, 2], "a": [0, 1, 2], "c": [0, 1, 2]},
  1260. columns=["b", "a", "c"],
  1261. index=["0", "1", "2"],
  1262. )
  1263. tm.assert_frame_equal(result, expected)
  1264. def test_constructor_list_of_derived_dicts(self):
  1265. class CustomDict(dict):
  1266. pass
  1267. d = {"a": 1.5, "b": 3}
  1268. data_custom = [CustomDict(d)]
  1269. data = [d]
  1270. result_custom = DataFrame(data_custom)
  1271. result = DataFrame(data)
  1272. tm.assert_frame_equal(result, result_custom)
  1273. def test_constructor_ragged(self):
  1274. data = {
  1275. "A": np.random.default_rng(2).standard_normal(10),
  1276. "B": np.random.default_rng(2).standard_normal(8),
  1277. }
  1278. with pytest.raises(ValueError, match="All arrays must be of the same length"):
  1279. DataFrame(data)
  1280. def test_constructor_scalar(self):
  1281. idx = Index(range(3))
  1282. df = DataFrame({"a": 0}, index=idx)
  1283. expected = DataFrame({"a": [0, 0, 0]}, index=idx)
  1284. tm.assert_frame_equal(df, expected, check_dtype=False)
  1285. def test_constructor_Series_copy_bug(self, float_frame):
  1286. df = DataFrame(float_frame["A"], index=float_frame.index, columns=["A"])
  1287. df.copy()
  1288. def test_constructor_mixed_dict_and_Series(self):
  1289. data = {}
  1290. data["A"] = {"foo": 1, "bar": 2, "baz": 3}
  1291. data["B"] = Series([4, 3, 2, 1], index=["bar", "qux", "baz", "foo"])
  1292. result = DataFrame(data)
  1293. assert result.index.is_monotonic_increasing
  1294. # ordering ambiguous, raise exception
  1295. with pytest.raises(ValueError, match="ambiguous ordering"):
  1296. DataFrame({"A": ["a", "b"], "B": {"a": "a", "b": "b"}})
  1297. # this is OK though
  1298. result = DataFrame({"A": ["a", "b"], "B": Series(["a", "b"], index=["a", "b"])})
  1299. expected = DataFrame({"A": ["a", "b"], "B": ["a", "b"]}, index=["a", "b"])
  1300. tm.assert_frame_equal(result, expected)
  1301. def test_constructor_mixed_type_rows(self):
  1302. # Issue 25075
  1303. data = [[1, 2], (3, 4)]
  1304. result = DataFrame(data)
  1305. expected = DataFrame([[1, 2], [3, 4]])
  1306. tm.assert_frame_equal(result, expected)
  1307. @pytest.mark.parametrize(
  1308. "tuples,lists",
  1309. [
  1310. ((), []),
  1311. ((()), []),
  1312. (((), ()), [(), ()]),
  1313. (((), ()), [[], []]),
  1314. (([], []), [[], []]),
  1315. (([1], [2]), [[1], [2]]), # GH 32776
  1316. (([1, 2, 3], [4, 5, 6]), [[1, 2, 3], [4, 5, 6]]),
  1317. ],
  1318. )
  1319. def test_constructor_tuple(self, tuples, lists):
  1320. # GH 25691
  1321. result = DataFrame(tuples)
  1322. expected = DataFrame(lists)
  1323. tm.assert_frame_equal(result, expected)
  1324. def test_constructor_list_of_tuples(self):
  1325. result = DataFrame({"A": [(1, 2), (3, 4)]})
  1326. expected = DataFrame({"A": Series([(1, 2), (3, 4)])})
  1327. tm.assert_frame_equal(result, expected)
  1328. def test_constructor_list_of_namedtuples(self):
  1329. # GH11181
  1330. named_tuple = namedtuple("Pandas", list("ab"))
  1331. tuples = [named_tuple(1, 3), named_tuple(2, 4)]
  1332. expected = DataFrame({"a": [1, 2], "b": [3, 4]})
  1333. result = DataFrame(tuples)
  1334. tm.assert_frame_equal(result, expected)
  1335. # with columns
  1336. expected = DataFrame({"y": [1, 2], "z": [3, 4]})
  1337. result = DataFrame(tuples, columns=["y", "z"])
  1338. tm.assert_frame_equal(result, expected)
  1339. def test_constructor_list_of_dataclasses(self):
  1340. # GH21910
  1341. Point = make_dataclass("Point", [("x", int), ("y", int)])
  1342. data = [Point(0, 3), Point(1, 3)]
  1343. expected = DataFrame({"x": [0, 1], "y": [3, 3]})
  1344. result = DataFrame(data)
  1345. tm.assert_frame_equal(result, expected)
  1346. def test_constructor_list_of_dataclasses_with_varying_types(self):
  1347. # GH21910
  1348. # varying types
  1349. Point = make_dataclass("Point", [("x", int), ("y", int)])
  1350. HLine = make_dataclass("HLine", [("x0", int), ("x1", int), ("y", int)])
  1351. data = [Point(0, 3), HLine(1, 3, 3)]
  1352. expected = DataFrame(
  1353. {"x": [0, np.nan], "y": [3, 3], "x0": [np.nan, 1], "x1": [np.nan, 3]}
  1354. )
  1355. result = DataFrame(data)
  1356. tm.assert_frame_equal(result, expected)
  1357. def test_constructor_list_of_dataclasses_error_thrown(self):
  1358. # GH21910
  1359. Point = make_dataclass("Point", [("x", int), ("y", int)])
  1360. # expect TypeError
  1361. msg = "asdict() should be called on dataclass instances"
  1362. with pytest.raises(TypeError, match=re.escape(msg)):
  1363. DataFrame([Point(0, 0), {"x": 1, "y": 0}])
  1364. def test_constructor_list_of_dict_order(self):
  1365. # GH10056
  1366. data = [
  1367. {"First": 1, "Second": 4, "Third": 7, "Fourth": 10},
  1368. {"Second": 5, "First": 2, "Fourth": 11, "Third": 8},
  1369. {"Second": 6, "First": 3, "Fourth": 12, "Third": 9, "YYY": 14, "XXX": 13},
  1370. ]
  1371. expected = DataFrame(
  1372. {
  1373. "First": [1, 2, 3],
  1374. "Second": [4, 5, 6],
  1375. "Third": [7, 8, 9],
  1376. "Fourth": [10, 11, 12],
  1377. "YYY": [None, None, 14],
  1378. "XXX": [None, None, 13],
  1379. }
  1380. )
  1381. result = DataFrame(data)
  1382. tm.assert_frame_equal(result, expected)
  1383. def test_constructor_Series_named(self):
  1384. a = Series([1, 2, 3], index=["a", "b", "c"], name="x")
  1385. df = DataFrame(a)
  1386. assert df.columns[0] == "x"
  1387. tm.assert_index_equal(df.index, a.index)
  1388. # ndarray like
  1389. arr = np.random.default_rng(2).standard_normal(10)
  1390. s = Series(arr, name="x")
  1391. df = DataFrame(s)
  1392. expected = DataFrame({"x": s})
  1393. tm.assert_frame_equal(df, expected)
  1394. s = Series(arr, index=range(3, 13))
  1395. df = DataFrame(s)
  1396. expected = DataFrame({0: s})
  1397. tm.assert_frame_equal(df, expected)
  1398. msg = r"Shape of passed values is \(10, 1\), indices imply \(10, 2\)"
  1399. with pytest.raises(ValueError, match=msg):
  1400. DataFrame(s, columns=[1, 2])
  1401. # #2234
  1402. a = Series([], name="x", dtype=object)
  1403. df = DataFrame(a)
  1404. assert df.columns[0] == "x"
  1405. # series with name and w/o
  1406. s1 = Series(arr, name="x")
  1407. df = DataFrame([s1, arr]).T
  1408. expected = DataFrame({"x": s1, "Unnamed 0": arr}, columns=["x", "Unnamed 0"])
  1409. tm.assert_frame_equal(df, expected)
  1410. # this is a bit non-intuitive here; the series collapse down to arrays
  1411. df = DataFrame([arr, s1]).T
  1412. expected = DataFrame({1: s1, 0: arr}, columns=[0, 1])
  1413. tm.assert_frame_equal(df, expected)
  1414. def test_constructor_Series_named_and_columns(self):
  1415. # GH 9232 validation
  1416. s0 = Series(range(5), name=0)
  1417. s1 = Series(range(5), name=1)
  1418. # matching name and column gives standard frame
  1419. tm.assert_frame_equal(DataFrame(s0, columns=[0]), s0.to_frame())
  1420. tm.assert_frame_equal(DataFrame(s1, columns=[1]), s1.to_frame())
  1421. # non-matching produces empty frame
  1422. assert DataFrame(s0, columns=[1]).empty
  1423. assert DataFrame(s1, columns=[0]).empty
  1424. def test_constructor_Series_differently_indexed(self):
  1425. # name
  1426. s1 = Series([1, 2, 3], index=["a", "b", "c"], name="x")
  1427. # no name
  1428. s2 = Series([1, 2, 3], index=["a", "b", "c"])
  1429. other_index = Index(["a", "b"])
  1430. df1 = DataFrame(s1, index=other_index)
  1431. exp1 = DataFrame(s1.reindex(other_index))
  1432. assert df1.columns[0] == "x"
  1433. tm.assert_frame_equal(df1, exp1)
  1434. df2 = DataFrame(s2, index=other_index)
  1435. exp2 = DataFrame(s2.reindex(other_index))
  1436. assert df2.columns[0] == 0
  1437. tm.assert_index_equal(df2.index, other_index)
  1438. tm.assert_frame_equal(df2, exp2)
  1439. @pytest.mark.parametrize(
  1440. "name_in1,name_in2,name_in3,name_out",
  1441. [
  1442. ("idx", "idx", "idx", "idx"),
  1443. ("idx", "idx", None, None),
  1444. ("idx", None, None, None),
  1445. ("idx1", "idx2", None, None),
  1446. ("idx1", "idx1", "idx2", None),
  1447. ("idx1", "idx2", "idx3", None),
  1448. (None, None, None, None),
  1449. ],
  1450. )
  1451. def test_constructor_index_names(self, name_in1, name_in2, name_in3, name_out):
  1452. # GH13475
  1453. indices = [
  1454. Index(["a", "b", "c"], name=name_in1),
  1455. Index(["b", "c", "d"], name=name_in2),
  1456. Index(["c", "d", "e"], name=name_in3),
  1457. ]
  1458. series = {
  1459. c: Series([0, 1, 2], index=i) for i, c in zip(indices, ["x", "y", "z"])
  1460. }
  1461. result = DataFrame(series)
  1462. exp_ind = Index(["a", "b", "c", "d", "e"], name=name_out)
  1463. expected = DataFrame(
  1464. {
  1465. "x": [0, 1, 2, np.nan, np.nan],
  1466. "y": [np.nan, 0, 1, 2, np.nan],
  1467. "z": [np.nan, np.nan, 0, 1, 2],
  1468. },
  1469. index=exp_ind,
  1470. )
  1471. tm.assert_frame_equal(result, expected)
  1472. def test_constructor_manager_resize(self, float_frame):
  1473. index = list(float_frame.index[:5])
  1474. columns = list(float_frame.columns[:3])
  1475. msg = "Passing a BlockManager to DataFrame"
  1476. with tm.assert_produces_warning(
  1477. DeprecationWarning, match=msg, check_stacklevel=False
  1478. ):
  1479. result = DataFrame(float_frame._mgr, index=index, columns=columns)
  1480. tm.assert_index_equal(result.index, Index(index))
  1481. tm.assert_index_equal(result.columns, Index(columns))
  1482. def test_constructor_mix_series_nonseries(self, float_frame):
  1483. df = DataFrame(
  1484. {"A": float_frame["A"], "B": list(float_frame["B"])}, columns=["A", "B"]
  1485. )
  1486. tm.assert_frame_equal(df, float_frame.loc[:, ["A", "B"]])
  1487. msg = "does not match index length"
  1488. with pytest.raises(ValueError, match=msg):
  1489. DataFrame({"A": float_frame["A"], "B": list(float_frame["B"])[:-2]})
  1490. def test_constructor_miscast_na_int_dtype(self):
  1491. msg = r"Cannot convert non-finite values \(NA or inf\) to integer"
  1492. with pytest.raises(IntCastingNaNError, match=msg):
  1493. DataFrame([[np.nan, 1], [1, 0]], dtype=np.int64)
  1494. def test_constructor_column_duplicates(self):
  1495. # it works! #2079
  1496. df = DataFrame([[8, 5]], columns=["a", "a"])
  1497. edf = DataFrame([[8, 5]])
  1498. edf.columns = ["a", "a"]
  1499. tm.assert_frame_equal(df, edf)
  1500. idf = DataFrame.from_records([(8, 5)], columns=["a", "a"])
  1501. tm.assert_frame_equal(idf, edf)
  1502. def test_constructor_empty_with_string_dtype(self, using_infer_string):
  1503. # GH 9428
  1504. expected = DataFrame(index=[0, 1], columns=[0, 1], dtype=object)
  1505. expected_str = DataFrame(
  1506. index=[0, 1], columns=[0, 1], dtype=pd.StringDtype(na_value=np.nan)
  1507. )
  1508. df = DataFrame(index=[0, 1], columns=[0, 1], dtype=str)
  1509. if using_infer_string:
  1510. tm.assert_frame_equal(df, expected_str)
  1511. else:
  1512. tm.assert_frame_equal(df, expected)
  1513. df = DataFrame(index=[0, 1], columns=[0, 1], dtype=np.str_)
  1514. tm.assert_frame_equal(df, expected)
  1515. df = DataFrame(index=[0, 1], columns=[0, 1], dtype="U5")
  1516. tm.assert_frame_equal(df, expected)
  1517. def test_constructor_empty_with_string_extension(self, nullable_string_dtype):
  1518. # GH 34915
  1519. expected = DataFrame(columns=["c1"], dtype=nullable_string_dtype)
  1520. df = DataFrame(columns=["c1"], dtype=nullable_string_dtype)
  1521. tm.assert_frame_equal(df, expected)
  1522. def test_constructor_single_value(self):
  1523. # expecting single value upcasting here
  1524. df = DataFrame(0.0, index=[1, 2, 3], columns=["a", "b", "c"])
  1525. tm.assert_frame_equal(
  1526. df, DataFrame(np.zeros(df.shape).astype("float64"), df.index, df.columns)
  1527. )
  1528. df = DataFrame(0, index=[1, 2, 3], columns=["a", "b", "c"])
  1529. tm.assert_frame_equal(
  1530. df, DataFrame(np.zeros(df.shape).astype("int64"), df.index, df.columns)
  1531. )
  1532. df = DataFrame("a", index=[1, 2], columns=["a", "c"])
  1533. tm.assert_frame_equal(
  1534. df,
  1535. DataFrame(
  1536. np.array([["a", "a"], ["a", "a"]], dtype=object),
  1537. index=[1, 2],
  1538. columns=["a", "c"],
  1539. ),
  1540. )
  1541. msg = "DataFrame constructor not properly called!"
  1542. with pytest.raises(ValueError, match=msg):
  1543. DataFrame("a", [1, 2])
  1544. with pytest.raises(ValueError, match=msg):
  1545. DataFrame("a", columns=["a", "c"])
  1546. msg = "incompatible data and dtype"
  1547. with pytest.raises(TypeError, match=msg):
  1548. DataFrame("a", [1, 2], ["a", "c"], float)
  1549. def test_constructor_with_datetimes(self, using_infer_string):
  1550. intname = np.dtype(int).name
  1551. floatname = np.dtype(np.float64).name
  1552. objectname = np.dtype(np.object_).name
  1553. # single item
  1554. df = DataFrame(
  1555. {
  1556. "A": 1,
  1557. "B": "foo",
  1558. "C": "bar",
  1559. "D": Timestamp("20010101"),
  1560. "E": datetime(2001, 1, 2, 0, 0),
  1561. },
  1562. index=np.arange(10),
  1563. )
  1564. result = df.dtypes
  1565. expected = Series(
  1566. [np.dtype("int64")]
  1567. + [
  1568. np.dtype(objectname)
  1569. if not using_infer_string
  1570. else pd.StringDtype(na_value=np.nan)
  1571. ]
  1572. * 2
  1573. + [np.dtype("M8[s]"), np.dtype("M8[us]")],
  1574. index=list("ABCDE"),
  1575. )
  1576. tm.assert_series_equal(result, expected)
  1577. # check with ndarray construction ndim==0 (e.g. we are passing a ndim 0
  1578. # ndarray with a dtype specified)
  1579. df = DataFrame(
  1580. {
  1581. "a": 1.0,
  1582. "b": 2,
  1583. "c": "foo",
  1584. floatname: np.array(1.0, dtype=floatname),
  1585. intname: np.array(1, dtype=intname),
  1586. },
  1587. index=np.arange(10),
  1588. )
  1589. result = df.dtypes
  1590. expected = Series(
  1591. [np.dtype("float64")]
  1592. + [np.dtype("int64")]
  1593. + [
  1594. np.dtype("object")
  1595. if not using_infer_string
  1596. else pd.StringDtype(na_value=np.nan)
  1597. ]
  1598. + [np.dtype("float64")]
  1599. + [np.dtype(intname)],
  1600. index=["a", "b", "c", floatname, intname],
  1601. )
  1602. tm.assert_series_equal(result, expected)
  1603. # check with ndarray construction ndim>0
  1604. df = DataFrame(
  1605. {
  1606. "a": 1.0,
  1607. "b": 2,
  1608. "c": "foo",
  1609. floatname: np.array([1.0] * 10, dtype=floatname),
  1610. intname: np.array([1] * 10, dtype=intname),
  1611. },
  1612. index=np.arange(10),
  1613. )
  1614. result = df.dtypes
  1615. expected = Series(
  1616. [np.dtype("float64")]
  1617. + [np.dtype("int64")]
  1618. + [
  1619. np.dtype("object")
  1620. if not using_infer_string
  1621. else pd.StringDtype(na_value=np.nan)
  1622. ]
  1623. + [np.dtype("float64")]
  1624. + [np.dtype(intname)],
  1625. index=["a", "b", "c", floatname, intname],
  1626. )
  1627. tm.assert_series_equal(result, expected)
  1628. def test_constructor_with_datetimes1(self):
  1629. # GH 2809
  1630. ind = date_range(start="2000-01-01", freq="D", periods=10)
  1631. datetimes = [ts.to_pydatetime() for ts in ind]
  1632. datetime_s = Series(datetimes)
  1633. assert datetime_s.dtype == "M8[ns]"
  1634. def test_constructor_with_datetimes2(self):
  1635. # GH 2810
  1636. ind = date_range(start="2000-01-01", freq="D", periods=10)
  1637. datetimes = [ts.to_pydatetime() for ts in ind]
  1638. dates = [ts.date() for ts in ind]
  1639. df = DataFrame(datetimes, columns=["datetimes"])
  1640. df["dates"] = dates
  1641. result = df.dtypes
  1642. expected = Series(
  1643. [np.dtype("datetime64[ns]"), np.dtype("object")],
  1644. index=["datetimes", "dates"],
  1645. )
  1646. tm.assert_series_equal(result, expected)
  1647. def test_constructor_with_datetimes3(self):
  1648. # GH 7594
  1649. # don't coerce tz-aware
  1650. tz = pytz.timezone("US/Eastern")
  1651. dt = tz.localize(datetime(2012, 1, 1))
  1652. df = DataFrame({"End Date": dt}, index=[0])
  1653. assert df.iat[0, 0] == dt
  1654. tm.assert_series_equal(
  1655. df.dtypes, Series({"End Date": "datetime64[us, US/Eastern]"}, dtype=object)
  1656. )
  1657. df = DataFrame([{"End Date": dt}])
  1658. assert df.iat[0, 0] == dt
  1659. tm.assert_series_equal(
  1660. df.dtypes, Series({"End Date": "datetime64[ns, US/Eastern]"}, dtype=object)
  1661. )
  1662. def test_constructor_with_datetimes4(self):
  1663. # tz-aware (UTC and other tz's)
  1664. # GH 8411
  1665. dr = date_range("20130101", periods=3)
  1666. df = DataFrame({"value": dr})
  1667. assert df.iat[0, 0].tz is None
  1668. dr = date_range("20130101", periods=3, tz="UTC")
  1669. df = DataFrame({"value": dr})
  1670. assert str(df.iat[0, 0].tz) == "UTC"
  1671. dr = date_range("20130101", periods=3, tz="US/Eastern")
  1672. df = DataFrame({"value": dr})
  1673. assert str(df.iat[0, 0].tz) == "US/Eastern"
  1674. def test_constructor_with_datetimes5(self):
  1675. # GH 7822
  1676. # preserver an index with a tz on dict construction
  1677. i = date_range("1/1/2011", periods=5, freq="10s", tz="US/Eastern")
  1678. expected = DataFrame({"a": i.to_series().reset_index(drop=True)})
  1679. df = DataFrame()
  1680. df["a"] = i
  1681. tm.assert_frame_equal(df, expected)
  1682. df = DataFrame({"a": i})
  1683. tm.assert_frame_equal(df, expected)
  1684. def test_constructor_with_datetimes6(self):
  1685. # multiples
  1686. i = date_range("1/1/2011", periods=5, freq="10s", tz="US/Eastern")
  1687. i_no_tz = date_range("1/1/2011", periods=5, freq="10s")
  1688. df = DataFrame({"a": i, "b": i_no_tz})
  1689. expected = DataFrame({"a": i.to_series().reset_index(drop=True), "b": i_no_tz})
  1690. tm.assert_frame_equal(df, expected)
  1691. @pytest.mark.parametrize(
  1692. "arr",
  1693. [
  1694. np.array([None, None, None, None, datetime.now(), None]),
  1695. np.array([None, None, datetime.now(), None]),
  1696. [[np.datetime64("NaT")], [None]],
  1697. [[np.datetime64("NaT")], [pd.NaT]],
  1698. [[None], [np.datetime64("NaT")]],
  1699. [[None], [pd.NaT]],
  1700. [[pd.NaT], [np.datetime64("NaT")]],
  1701. [[pd.NaT], [None]],
  1702. ],
  1703. )
  1704. def test_constructor_datetimes_with_nulls(self, arr):
  1705. # gh-15869, GH#11220
  1706. result = DataFrame(arr).dtypes
  1707. expected = Series([np.dtype("datetime64[ns]")])
  1708. tm.assert_series_equal(result, expected)
  1709. @pytest.mark.parametrize("order", ["K", "A", "C", "F"])
  1710. @pytest.mark.parametrize(
  1711. "unit",
  1712. ["M", "D", "h", "m", "s", "ms", "us", "ns"],
  1713. )
  1714. def test_constructor_datetimes_non_ns(self, order, unit):
  1715. dtype = f"datetime64[{unit}]"
  1716. na = np.array(
  1717. [
  1718. ["2015-01-01", "2015-01-02", "2015-01-03"],
  1719. ["2017-01-01", "2017-01-02", "2017-02-03"],
  1720. ],
  1721. dtype=dtype,
  1722. order=order,
  1723. )
  1724. df = DataFrame(na)
  1725. expected = DataFrame(na.astype("M8[ns]"))
  1726. if unit in ["M", "D", "h", "m"]:
  1727. with pytest.raises(TypeError, match="Cannot cast"):
  1728. expected.astype(dtype)
  1729. # instead the constructor casts to the closest supported reso, i.e. "s"
  1730. expected = expected.astype("datetime64[s]")
  1731. else:
  1732. expected = expected.astype(dtype=dtype)
  1733. tm.assert_frame_equal(df, expected)
  1734. @pytest.mark.parametrize("order", ["K", "A", "C", "F"])
  1735. @pytest.mark.parametrize(
  1736. "unit",
  1737. [
  1738. "D",
  1739. "h",
  1740. "m",
  1741. "s",
  1742. "ms",
  1743. "us",
  1744. "ns",
  1745. ],
  1746. )
  1747. def test_constructor_timedelta_non_ns(self, order, unit):
  1748. dtype = f"timedelta64[{unit}]"
  1749. na = np.array(
  1750. [
  1751. [np.timedelta64(1, "D"), np.timedelta64(2, "D")],
  1752. [np.timedelta64(4, "D"), np.timedelta64(5, "D")],
  1753. ],
  1754. dtype=dtype,
  1755. order=order,
  1756. )
  1757. df = DataFrame(na)
  1758. if unit in ["D", "h", "m"]:
  1759. # we get the nearest supported unit, i.e. "s"
  1760. exp_unit = "s"
  1761. else:
  1762. exp_unit = unit
  1763. exp_dtype = np.dtype(f"m8[{exp_unit}]")
  1764. expected = DataFrame(
  1765. [
  1766. [Timedelta(1, "D"), Timedelta(2, "D")],
  1767. [Timedelta(4, "D"), Timedelta(5, "D")],
  1768. ],
  1769. dtype=exp_dtype,
  1770. )
  1771. # TODO(2.0): ideally we should get the same 'expected' without passing
  1772. # dtype=exp_dtype.
  1773. tm.assert_frame_equal(df, expected)
  1774. def test_constructor_for_list_with_dtypes(self, using_infer_string):
  1775. # test list of lists/ndarrays
  1776. df = DataFrame([np.arange(5) for x in range(5)])
  1777. result = df.dtypes
  1778. expected = Series([np.dtype("int")] * 5)
  1779. tm.assert_series_equal(result, expected)
  1780. df = DataFrame([np.array(np.arange(5), dtype="int32") for x in range(5)])
  1781. result = df.dtypes
  1782. expected = Series([np.dtype("int32")] * 5)
  1783. tm.assert_series_equal(result, expected)
  1784. # overflow issue? (we always expected int64 upcasting here)
  1785. df = DataFrame({"a": [2**31, 2**31 + 1]})
  1786. assert df.dtypes.iloc[0] == np.dtype("int64")
  1787. # GH #2751 (construction with no index specified), make sure we cast to
  1788. # platform values
  1789. df = DataFrame([1, 2])
  1790. assert df.dtypes.iloc[0] == np.dtype("int64")
  1791. df = DataFrame([1.0, 2.0])
  1792. assert df.dtypes.iloc[0] == np.dtype("float64")
  1793. df = DataFrame({"a": [1, 2]})
  1794. assert df.dtypes.iloc[0] == np.dtype("int64")
  1795. df = DataFrame({"a": [1.0, 2.0]})
  1796. assert df.dtypes.iloc[0] == np.dtype("float64")
  1797. df = DataFrame({"a": 1}, index=range(3))
  1798. assert df.dtypes.iloc[0] == np.dtype("int64")
  1799. df = DataFrame({"a": 1.0}, index=range(3))
  1800. assert df.dtypes.iloc[0] == np.dtype("float64")
  1801. # with object list
  1802. df = DataFrame(
  1803. {
  1804. "a": [1, 2, 4, 7],
  1805. "b": [1.2, 2.3, 5.1, 6.3],
  1806. "c": list("abcd"),
  1807. "d": [datetime(2000, 1, 1) for i in range(4)],
  1808. "e": [1.0, 2, 4.0, 7],
  1809. }
  1810. )
  1811. result = df.dtypes
  1812. expected = Series(
  1813. [
  1814. np.dtype("int64"),
  1815. np.dtype("float64"),
  1816. np.dtype("object")
  1817. if not using_infer_string
  1818. else pd.StringDtype(na_value=np.nan),
  1819. np.dtype("datetime64[ns]"),
  1820. np.dtype("float64"),
  1821. ],
  1822. index=list("abcde"),
  1823. )
  1824. tm.assert_series_equal(result, expected)
  1825. def test_constructor_frame_copy(self, float_frame):
  1826. cop = DataFrame(float_frame, copy=True)
  1827. cop["A"] = 5
  1828. assert (cop["A"] == 5).all()
  1829. assert not (float_frame["A"] == 5).all()
  1830. def test_constructor_frame_shallow_copy(self, float_frame):
  1831. # constructing a DataFrame from DataFrame with copy=False should still
  1832. # give a "shallow" copy (share data, not attributes)
  1833. # https://github.com/pandas-dev/pandas/issues/49523
  1834. orig = float_frame.copy()
  1835. cop = DataFrame(float_frame)
  1836. assert cop._mgr is not float_frame._mgr
  1837. # Overwriting index of copy doesn't change original
  1838. cop.index = np.arange(len(cop))
  1839. tm.assert_frame_equal(float_frame, orig)
  1840. def test_constructor_ndarray_copy(
  1841. self, float_frame, using_array_manager, using_copy_on_write
  1842. ):
  1843. if not using_array_manager:
  1844. arr = float_frame.values.copy()
  1845. df = DataFrame(arr)
  1846. arr[5] = 5
  1847. if using_copy_on_write:
  1848. assert not (df.values[5] == 5).all()
  1849. else:
  1850. assert (df.values[5] == 5).all()
  1851. df = DataFrame(arr, copy=True)
  1852. arr[6] = 6
  1853. assert not (df.values[6] == 6).all()
  1854. else:
  1855. arr = float_frame.values.copy()
  1856. # default: copy to ensure contiguous arrays
  1857. df = DataFrame(arr)
  1858. assert df._mgr.arrays[0].flags.c_contiguous
  1859. arr[0, 0] = 100
  1860. assert df.iloc[0, 0] != 100
  1861. # manually specify copy=False
  1862. df = DataFrame(arr, copy=False)
  1863. assert not df._mgr.arrays[0].flags.c_contiguous
  1864. arr[0, 0] = 1000
  1865. assert df.iloc[0, 0] == 1000
  1866. def test_constructor_series_copy(self, float_frame):
  1867. series = float_frame._series
  1868. df = DataFrame({"A": series["A"]}, copy=True)
  1869. # TODO can be replaced with `df.loc[:, "A"] = 5` after deprecation about
  1870. # inplace mutation is enforced
  1871. df.loc[df.index[0] : df.index[-1], "A"] = 5
  1872. assert not (series["A"] == 5).all()
  1873. @pytest.mark.parametrize(
  1874. "df",
  1875. [
  1876. DataFrame([[1, 2, 3], [4, 5, 6]], index=[1, np.nan]),
  1877. DataFrame([[1, 2, 3], [4, 5, 6]], columns=[1.1, 2.2, np.nan]),
  1878. DataFrame([[0, 1, 2, 3], [4, 5, 6, 7]], columns=[np.nan, 1.1, 2.2, np.nan]),
  1879. DataFrame(
  1880. [[0.0, 1, 2, 3.0], [4, 5, 6, 7]], columns=[np.nan, 1.1, 2.2, np.nan]
  1881. ),
  1882. DataFrame([[0.0, 1, 2, 3.0], [4, 5, 6, 7]], columns=[np.nan, 1, 2, 2]),
  1883. ],
  1884. )
  1885. def test_constructor_with_nas(self, df):
  1886. # GH 5016
  1887. # na's in indices
  1888. # GH 21428 (non-unique columns)
  1889. for i in range(len(df.columns)):
  1890. df.iloc[:, i]
  1891. indexer = np.arange(len(df.columns))[isna(df.columns)]
  1892. # No NaN found -> error
  1893. if len(indexer) == 0:
  1894. with pytest.raises(KeyError, match="^nan$"):
  1895. df.loc[:, np.nan]
  1896. # single nan should result in Series
  1897. elif len(indexer) == 1:
  1898. tm.assert_series_equal(df.iloc[:, indexer[0]], df.loc[:, np.nan])
  1899. # multiple nans should result in DataFrame
  1900. else:
  1901. tm.assert_frame_equal(df.iloc[:, indexer], df.loc[:, np.nan])
  1902. def test_constructor_lists_to_object_dtype(self):
  1903. # from #1074
  1904. d = DataFrame({"a": [np.nan, False]})
  1905. assert d["a"].dtype == np.object_
  1906. assert not d["a"][1]
  1907. def test_constructor_ndarray_categorical_dtype(self):
  1908. cat = Categorical(["A", "B", "C"])
  1909. arr = np.array(cat).reshape(-1, 1)
  1910. arr = np.broadcast_to(arr, (3, 4))
  1911. result = DataFrame(arr, dtype=cat.dtype)
  1912. expected = DataFrame({0: cat, 1: cat, 2: cat, 3: cat})
  1913. tm.assert_frame_equal(result, expected)
  1914. def test_constructor_categorical(self):
  1915. # GH8626
  1916. # dict creation
  1917. df = DataFrame({"A": list("abc")}, dtype="category")
  1918. expected = Series(list("abc"), dtype="category", name="A")
  1919. tm.assert_series_equal(df["A"], expected)
  1920. # to_frame
  1921. s = Series(list("abc"), dtype="category")
  1922. result = s.to_frame()
  1923. expected = Series(list("abc"), dtype="category", name=0)
  1924. tm.assert_series_equal(result[0], expected)
  1925. result = s.to_frame(name="foo")
  1926. expected = Series(list("abc"), dtype="category", name="foo")
  1927. tm.assert_series_equal(result["foo"], expected)
  1928. # list-like creation
  1929. df = DataFrame(list("abc"), dtype="category")
  1930. expected = Series(list("abc"), dtype="category", name=0)
  1931. tm.assert_series_equal(df[0], expected)
  1932. def test_construct_from_1item_list_of_categorical(self):
  1933. # pre-2.0 this behaved as DataFrame({0: cat}), in 2.0 we remove
  1934. # Categorical special case
  1935. # ndim != 1
  1936. cat = Categorical(list("abc"))
  1937. df = DataFrame([cat])
  1938. expected = DataFrame([cat.astype(object)])
  1939. tm.assert_frame_equal(df, expected)
  1940. def test_construct_from_list_of_categoricals(self):
  1941. # pre-2.0 this behaved as DataFrame({0: cat}), in 2.0 we remove
  1942. # Categorical special case
  1943. df = DataFrame([Categorical(list("abc")), Categorical(list("abd"))])
  1944. expected = DataFrame([["a", "b", "c"], ["a", "b", "d"]])
  1945. tm.assert_frame_equal(df, expected)
  1946. def test_from_nested_listlike_mixed_types(self):
  1947. # pre-2.0 this behaved as DataFrame({0: cat}), in 2.0 we remove
  1948. # Categorical special case
  1949. # mixed
  1950. df = DataFrame([Categorical(list("abc")), list("def")])
  1951. expected = DataFrame([["a", "b", "c"], ["d", "e", "f"]])
  1952. tm.assert_frame_equal(df, expected)
  1953. def test_construct_from_listlikes_mismatched_lengths(self):
  1954. df = DataFrame([Categorical(list("abc")), Categorical(list("abdefg"))])
  1955. expected = DataFrame([list("abc"), list("abdefg")])
  1956. tm.assert_frame_equal(df, expected)
  1957. def test_constructor_categorical_series(self):
  1958. items = [1, 2, 3, 1]
  1959. exp = Series(items).astype("category")
  1960. res = Series(items, dtype="category")
  1961. tm.assert_series_equal(res, exp)
  1962. items = ["a", "b", "c", "a"]
  1963. exp = Series(items).astype("category")
  1964. res = Series(items, dtype="category")
  1965. tm.assert_series_equal(res, exp)
  1966. # insert into frame with different index
  1967. # GH 8076
  1968. index = date_range("20000101", periods=3)
  1969. expected = Series(
  1970. Categorical(values=[np.nan, np.nan, np.nan], categories=["a", "b", "c"])
  1971. )
  1972. expected.index = index
  1973. expected = DataFrame({"x": expected})
  1974. df = DataFrame({"x": Series(["a", "b", "c"], dtype="category")}, index=index)
  1975. tm.assert_frame_equal(df, expected)
  1976. @pytest.mark.parametrize(
  1977. "dtype",
  1978. tm.ALL_NUMERIC_DTYPES
  1979. + tm.DATETIME64_DTYPES
  1980. + tm.TIMEDELTA64_DTYPES
  1981. + tm.BOOL_DTYPES,
  1982. )
  1983. def test_check_dtype_empty_numeric_column(self, dtype):
  1984. # GH24386: Ensure dtypes are set correctly for an empty DataFrame.
  1985. # Empty DataFrame is generated via dictionary data with non-overlapping columns.
  1986. data = DataFrame({"a": [1, 2]}, columns=["b"], dtype=dtype)
  1987. assert data.b.dtype == dtype
  1988. @pytest.mark.parametrize(
  1989. "dtype", tm.STRING_DTYPES + tm.BYTES_DTYPES + tm.OBJECT_DTYPES
  1990. )
  1991. def test_check_dtype_empty_string_column(self, request, dtype, using_array_manager):
  1992. # GH24386: Ensure dtypes are set correctly for an empty DataFrame.
  1993. # Empty DataFrame is generated via dictionary data with non-overlapping columns.
  1994. data = DataFrame({"a": [1, 2]}, columns=["b"], dtype=dtype)
  1995. if using_array_manager and dtype in tm.BYTES_DTYPES:
  1996. # TODO(ArrayManager) astype to bytes dtypes does not yet give object dtype
  1997. td.mark_array_manager_not_yet_implemented(request)
  1998. assert data.b.dtype.name == "object"
  1999. def test_to_frame_with_falsey_names(self):
  2000. # GH 16114
  2001. result = Series(name=0, dtype=object).to_frame().dtypes
  2002. expected = Series({0: object})
  2003. tm.assert_series_equal(result, expected)
  2004. result = DataFrame(Series(name=0, dtype=object)).dtypes
  2005. tm.assert_series_equal(result, expected)
  2006. @pytest.mark.arm_slow
  2007. @pytest.mark.parametrize("dtype", [None, "uint8", "category"])
  2008. def test_constructor_range_dtype(self, dtype):
  2009. expected = DataFrame({"A": [0, 1, 2, 3, 4]}, dtype=dtype or "int64")
  2010. # GH 26342
  2011. result = DataFrame(range(5), columns=["A"], dtype=dtype)
  2012. tm.assert_frame_equal(result, expected)
  2013. # GH 16804
  2014. result = DataFrame({"A": range(5)}, dtype=dtype)
  2015. tm.assert_frame_equal(result, expected)
  2016. def test_frame_from_list_subclass(self):
  2017. # GH21226
  2018. class List(list):
  2019. pass
  2020. expected = DataFrame([[1, 2, 3], [4, 5, 6]])
  2021. result = DataFrame(List([List([1, 2, 3]), List([4, 5, 6])]))
  2022. tm.assert_frame_equal(result, expected)
  2023. @pytest.mark.parametrize(
  2024. "extension_arr",
  2025. [
  2026. Categorical(list("aabbc")),
  2027. SparseArray([1, np.nan, np.nan, np.nan]),
  2028. IntervalArray([Interval(0, 1), Interval(1, 5)]),
  2029. PeriodArray(pd.period_range(start="1/1/2017", end="1/1/2018", freq="M")),
  2030. ],
  2031. )
  2032. def test_constructor_with_extension_array(self, extension_arr):
  2033. # GH11363
  2034. expected = DataFrame(Series(extension_arr))
  2035. result = DataFrame(extension_arr)
  2036. tm.assert_frame_equal(result, expected)
  2037. def test_datetime_date_tuple_columns_from_dict(self):
  2038. # GH 10863
  2039. v = date.today()
  2040. tup = v, v
  2041. result = DataFrame({tup: Series(range(3), index=range(3))}, columns=[tup])
  2042. expected = DataFrame([0, 1, 2], columns=Index(Series([tup])))
  2043. tm.assert_frame_equal(result, expected)
  2044. def test_construct_with_two_categoricalindex_series(self):
  2045. # GH 14600
  2046. s1 = Series([39, 6, 4], index=CategoricalIndex(["female", "male", "unknown"]))
  2047. s2 = Series(
  2048. [2, 152, 2, 242, 150],
  2049. index=CategoricalIndex(["f", "female", "m", "male", "unknown"]),
  2050. )
  2051. result = DataFrame([s1, s2])
  2052. expected = DataFrame(
  2053. np.array([[39, 6, 4, np.nan, np.nan], [152.0, 242.0, 150.0, 2.0, 2.0]]),
  2054. columns=["female", "male", "unknown", "f", "m"],
  2055. )
  2056. tm.assert_frame_equal(result, expected)
  2057. @pytest.mark.filterwarnings(
  2058. "ignore:invalid value encountered in cast:RuntimeWarning"
  2059. )
  2060. def test_constructor_series_nonexact_categoricalindex(self):
  2061. # GH 42424
  2062. ser = Series(range(100))
  2063. ser1 = cut(ser, 10).value_counts().head(5)
  2064. ser2 = cut(ser, 10).value_counts().tail(5)
  2065. result = DataFrame({"1": ser1, "2": ser2})
  2066. index = CategoricalIndex(
  2067. [
  2068. Interval(-0.099, 9.9, closed="right"),
  2069. Interval(9.9, 19.8, closed="right"),
  2070. Interval(19.8, 29.7, closed="right"),
  2071. Interval(29.7, 39.6, closed="right"),
  2072. Interval(39.6, 49.5, closed="right"),
  2073. Interval(49.5, 59.4, closed="right"),
  2074. Interval(59.4, 69.3, closed="right"),
  2075. Interval(69.3, 79.2, closed="right"),
  2076. Interval(79.2, 89.1, closed="right"),
  2077. Interval(89.1, 99, closed="right"),
  2078. ],
  2079. ordered=True,
  2080. )
  2081. expected = DataFrame(
  2082. {"1": [10] * 5 + [np.nan] * 5, "2": [np.nan] * 5 + [10] * 5}, index=index
  2083. )
  2084. tm.assert_frame_equal(expected, result)
  2085. def test_from_M8_structured(self):
  2086. dates = [(datetime(2012, 9, 9, 0, 0), datetime(2012, 9, 8, 15, 10))]
  2087. arr = np.array(dates, dtype=[("Date", "M8[us]"), ("Forecasting", "M8[us]")])
  2088. df = DataFrame(arr)
  2089. assert df["Date"][0] == dates[0][0]
  2090. assert df["Forecasting"][0] == dates[0][1]
  2091. s = Series(arr["Date"])
  2092. assert isinstance(s[0], Timestamp)
  2093. assert s[0] == dates[0][0]
  2094. def test_from_datetime_subclass(self):
  2095. # GH21142 Verify whether Datetime subclasses are also of dtype datetime
  2096. class DatetimeSubclass(datetime):
  2097. pass
  2098. data = DataFrame({"datetime": [DatetimeSubclass(2020, 1, 1, 1, 1)]})
  2099. assert data.datetime.dtype == "datetime64[ns]"
  2100. def test_with_mismatched_index_length_raises(self):
  2101. # GH#33437
  2102. dti = date_range("2016-01-01", periods=3, tz="US/Pacific")
  2103. msg = "Shape of passed values|Passed arrays should have the same length"
  2104. with pytest.raises(ValueError, match=msg):
  2105. DataFrame(dti, index=range(4))
  2106. def test_frame_ctor_datetime64_column(self):
  2107. rng = date_range("1/1/2000 00:00:00", "1/1/2000 1:59:50", freq="10s")
  2108. dates = np.asarray(rng)
  2109. df = DataFrame(
  2110. {"A": np.random.default_rng(2).standard_normal(len(rng)), "B": dates}
  2111. )
  2112. assert np.issubdtype(df["B"].dtype, np.dtype("M8[ns]"))
  2113. def test_dataframe_constructor_infer_multiindex(self):
  2114. index_lists = [["a", "a", "b", "b"], ["x", "y", "x", "y"]]
  2115. multi = DataFrame(
  2116. np.random.default_rng(2).standard_normal((4, 4)),
  2117. index=[np.array(x) for x in index_lists],
  2118. )
  2119. assert isinstance(multi.index, MultiIndex)
  2120. assert not isinstance(multi.columns, MultiIndex)
  2121. multi = DataFrame(
  2122. np.random.default_rng(2).standard_normal((4, 4)), columns=index_lists
  2123. )
  2124. assert isinstance(multi.columns, MultiIndex)
  2125. @pytest.mark.parametrize(
  2126. "input_vals",
  2127. [
  2128. ([1, 2]),
  2129. (["1", "2"]),
  2130. (list(date_range("1/1/2011", periods=2, freq="h"))),
  2131. (list(date_range("1/1/2011", periods=2, freq="h", tz="US/Eastern"))),
  2132. ([Interval(left=0, right=5)]),
  2133. ],
  2134. )
  2135. def test_constructor_list_str(self, input_vals, string_dtype):
  2136. # GH#16605
  2137. # Ensure that data elements are converted to strings when
  2138. # dtype is str, 'str', or 'U'
  2139. result = DataFrame({"A": input_vals}, dtype=string_dtype)
  2140. expected = DataFrame({"A": input_vals}).astype({"A": string_dtype})
  2141. tm.assert_frame_equal(result, expected)
  2142. def test_constructor_list_str_na(self, string_dtype):
  2143. result = DataFrame({"A": [1.0, 2.0, None]}, dtype=string_dtype)
  2144. expected = DataFrame({"A": ["1.0", "2.0", None]}, dtype=object)
  2145. tm.assert_frame_equal(result, expected)
  2146. @pytest.mark.parametrize("copy", [False, True])
  2147. def test_dict_nocopy(
  2148. self,
  2149. request,
  2150. copy,
  2151. any_numeric_ea_dtype,
  2152. any_numpy_dtype,
  2153. using_array_manager,
  2154. using_copy_on_write,
  2155. ):
  2156. if (
  2157. using_array_manager
  2158. and not copy
  2159. and any_numpy_dtype not in tm.STRING_DTYPES + tm.BYTES_DTYPES
  2160. ):
  2161. # TODO(ArrayManager) properly honor copy keyword for dict input
  2162. td.mark_array_manager_not_yet_implemented(request)
  2163. a = np.array([1, 2], dtype=any_numpy_dtype)
  2164. b = np.array([3, 4], dtype=any_numpy_dtype)
  2165. if b.dtype.kind in ["S", "U"]:
  2166. # These get cast, making the checks below more cumbersome
  2167. pytest.skip(f"{b.dtype} get cast, making the checks below more cumbersome")
  2168. c = pd.array([1, 2], dtype=any_numeric_ea_dtype)
  2169. c_orig = c.copy()
  2170. df = DataFrame({"a": a, "b": b, "c": c}, copy=copy)
  2171. def get_base(obj):
  2172. if isinstance(obj, np.ndarray):
  2173. return obj.base
  2174. elif isinstance(obj.dtype, np.dtype):
  2175. # i.e. DatetimeArray, TimedeltaArray
  2176. return obj._ndarray.base
  2177. else:
  2178. raise TypeError
  2179. def check_views(c_only: bool = False):
  2180. # written to work for either BlockManager or ArrayManager
  2181. # Check that the underlying data behind df["c"] is still `c`
  2182. # after setting with iloc. Since we don't know which entry in
  2183. # df._mgr.arrays corresponds to df["c"], we just check that exactly
  2184. # one of these arrays is `c`. GH#38939
  2185. assert sum(x is c for x in df._mgr.arrays) == 1
  2186. if c_only:
  2187. # If we ever stop consolidating in setitem_with_indexer,
  2188. # this will become unnecessary.
  2189. return
  2190. assert (
  2191. sum(
  2192. get_base(x) is a
  2193. for x in df._mgr.arrays
  2194. if isinstance(x.dtype, np.dtype)
  2195. )
  2196. == 1
  2197. )
  2198. assert (
  2199. sum(
  2200. get_base(x) is b
  2201. for x in df._mgr.arrays
  2202. if isinstance(x.dtype, np.dtype)
  2203. )
  2204. == 1
  2205. )
  2206. if not copy:
  2207. # constructor preserves views
  2208. check_views()
  2209. # TODO: most of the rest of this test belongs in indexing tests
  2210. if lib.is_np_dtype(df.dtypes.iloc[0], "fciuO"):
  2211. warn = None
  2212. else:
  2213. warn = FutureWarning
  2214. with tm.assert_produces_warning(warn, match="incompatible dtype"):
  2215. df.iloc[0, 0] = 0
  2216. df.iloc[0, 1] = 0
  2217. if not copy:
  2218. check_views(True)
  2219. # FIXME(GH#35417): until GH#35417, iloc.setitem into EA values does not preserve
  2220. # view, so we have to check in the other direction
  2221. df.iloc[:, 2] = pd.array([45, 46], dtype=c.dtype)
  2222. assert df.dtypes.iloc[2] == c.dtype
  2223. if not copy and not using_copy_on_write:
  2224. check_views(True)
  2225. if copy:
  2226. if a.dtype.kind == "M":
  2227. assert a[0] == a.dtype.type(1, "ns")
  2228. assert b[0] == b.dtype.type(3, "ns")
  2229. else:
  2230. assert a[0] == a.dtype.type(1)
  2231. assert b[0] == b.dtype.type(3)
  2232. # FIXME(GH#35417): enable after GH#35417
  2233. assert c[0] == c_orig[0] # i.e. df.iloc[0, 2]=45 did *not* update c
  2234. elif not using_copy_on_write:
  2235. # TODO: we can call check_views if we stop consolidating
  2236. # in setitem_with_indexer
  2237. assert c[0] == 45 # i.e. df.iloc[0, 2]=45 *did* update c
  2238. # TODO: we can check b[0] == 0 if we stop consolidating in
  2239. # setitem_with_indexer (except for datetimelike?)
  2240. def test_construct_from_dict_ea_series(self):
  2241. # GH#53744 - default of copy=True should also apply for Series with
  2242. # extension dtype
  2243. ser = Series([1, 2, 3], dtype="Int64")
  2244. df = DataFrame({"a": ser})
  2245. assert not np.shares_memory(ser.values._data, df["a"].values._data)
  2246. def test_from_series_with_name_with_columns(self):
  2247. # GH 7893
  2248. result = DataFrame(Series(1, name="foo"), columns=["bar"])
  2249. expected = DataFrame(columns=["bar"])
  2250. tm.assert_frame_equal(result, expected)
  2251. def test_nested_list_columns(self):
  2252. # GH 14467
  2253. result = DataFrame(
  2254. [[1, 2, 3], [4, 5, 6]], columns=[["A", "A", "A"], ["a", "b", "c"]]
  2255. )
  2256. expected = DataFrame(
  2257. [[1, 2, 3], [4, 5, 6]],
  2258. columns=MultiIndex.from_tuples([("A", "a"), ("A", "b"), ("A", "c")]),
  2259. )
  2260. tm.assert_frame_equal(result, expected)
  2261. def test_from_2d_object_array_of_periods_or_intervals(self):
  2262. # Period analogue to GH#26825
  2263. pi = pd.period_range("2016-04-05", periods=3)
  2264. data = pi._data.astype(object).reshape(1, -1)
  2265. df = DataFrame(data)
  2266. assert df.shape == (1, 3)
  2267. assert (df.dtypes == pi.dtype).all()
  2268. assert (df == pi).all().all()
  2269. ii = pd.IntervalIndex.from_breaks([3, 4, 5, 6])
  2270. data2 = ii._data.astype(object).reshape(1, -1)
  2271. df2 = DataFrame(data2)
  2272. assert df2.shape == (1, 3)
  2273. assert (df2.dtypes == ii.dtype).all()
  2274. assert (df2 == ii).all().all()
  2275. # mixed
  2276. data3 = np.r_[data, data2, data, data2].T
  2277. df3 = DataFrame(data3)
  2278. expected = DataFrame({0: pi, 1: ii, 2: pi, 3: ii})
  2279. tm.assert_frame_equal(df3, expected)
  2280. @pytest.mark.parametrize(
  2281. "col_a, col_b",
  2282. [
  2283. ([[1], [2]], np.array([[1], [2]])),
  2284. (np.array([[1], [2]]), [[1], [2]]),
  2285. (np.array([[1], [2]]), np.array([[1], [2]])),
  2286. ],
  2287. )
  2288. def test_error_from_2darray(self, col_a, col_b):
  2289. msg = "Per-column arrays must each be 1-dimensional"
  2290. with pytest.raises(ValueError, match=msg):
  2291. DataFrame({"a": col_a, "b": col_b})
  2292. def test_from_dict_with_missing_copy_false(self):
  2293. # GH#45369 filled columns should not be views of one another
  2294. df = DataFrame(index=[1, 2, 3], columns=["a", "b", "c"], copy=False)
  2295. assert not np.shares_memory(df["a"]._values, df["b"]._values)
  2296. df.iloc[0, 0] = 0
  2297. expected = DataFrame(
  2298. {
  2299. "a": [0, np.nan, np.nan],
  2300. "b": [np.nan, np.nan, np.nan],
  2301. "c": [np.nan, np.nan, np.nan],
  2302. },
  2303. index=[1, 2, 3],
  2304. dtype=object,
  2305. )
  2306. tm.assert_frame_equal(df, expected)
  2307. def test_construction_empty_array_multi_column_raises(self):
  2308. # GH#46822
  2309. msg = r"Shape of passed values is \(0, 1\), indices imply \(0, 2\)"
  2310. with pytest.raises(ValueError, match=msg):
  2311. DataFrame(data=np.array([]), columns=["a", "b"])
  2312. def test_construct_with_strings_and_none(self):
  2313. # GH#32218
  2314. df = DataFrame(["1", "2", None], columns=["a"], dtype="str")
  2315. expected = DataFrame({"a": ["1", "2", None]}, dtype="str")
  2316. tm.assert_frame_equal(df, expected)
  2317. def test_frame_string_inference(self):
  2318. # GH#54430
  2319. dtype = pd.StringDtype(na_value=np.nan)
  2320. expected = DataFrame(
  2321. {"a": ["a", "b"]}, dtype=dtype, columns=Index(["a"], dtype=dtype)
  2322. )
  2323. with pd.option_context("future.infer_string", True):
  2324. df = DataFrame({"a": ["a", "b"]})
  2325. tm.assert_frame_equal(df, expected)
  2326. expected = DataFrame(
  2327. {"a": ["a", "b"]},
  2328. dtype=dtype,
  2329. columns=Index(["a"], dtype=dtype),
  2330. index=Index(["x", "y"], dtype=dtype),
  2331. )
  2332. with pd.option_context("future.infer_string", True):
  2333. df = DataFrame({"a": ["a", "b"]}, index=["x", "y"])
  2334. tm.assert_frame_equal(df, expected)
  2335. expected = DataFrame(
  2336. {"a": ["a", 1]}, dtype="object", columns=Index(["a"], dtype=dtype)
  2337. )
  2338. with pd.option_context("future.infer_string", True):
  2339. df = DataFrame({"a": ["a", 1]})
  2340. tm.assert_frame_equal(df, expected)
  2341. expected = DataFrame(
  2342. {"a": ["a", "b"]}, dtype="object", columns=Index(["a"], dtype=dtype)
  2343. )
  2344. with pd.option_context("future.infer_string", True):
  2345. df = DataFrame({"a": ["a", "b"]}, dtype="object")
  2346. tm.assert_frame_equal(df, expected)
  2347. def test_frame_string_inference_array_string_dtype(self):
  2348. # GH#54496
  2349. dtype = pd.StringDtype(na_value=np.nan)
  2350. expected = DataFrame(
  2351. {"a": ["a", "b"]}, dtype=dtype, columns=Index(["a"], dtype=dtype)
  2352. )
  2353. with pd.option_context("future.infer_string", True):
  2354. df = DataFrame({"a": np.array(["a", "b"])})
  2355. tm.assert_frame_equal(df, expected)
  2356. expected = DataFrame({0: ["a", "b"], 1: ["c", "d"]}, dtype=dtype)
  2357. with pd.option_context("future.infer_string", True):
  2358. df = DataFrame(np.array([["a", "c"], ["b", "d"]]))
  2359. tm.assert_frame_equal(df, expected)
  2360. expected = DataFrame(
  2361. {"a": ["a", "b"], "b": ["c", "d"]},
  2362. dtype=dtype,
  2363. columns=Index(["a", "b"], dtype=dtype),
  2364. )
  2365. with pd.option_context("future.infer_string", True):
  2366. df = DataFrame(np.array([["a", "c"], ["b", "d"]]), columns=["a", "b"])
  2367. tm.assert_frame_equal(df, expected)
  2368. def test_frame_string_inference_block_dim(self):
  2369. # GH#55363
  2370. with pd.option_context("future.infer_string", True):
  2371. df = DataFrame(np.array([["hello", "goodbye"], ["hello", "Hello"]]))
  2372. assert df._mgr.blocks[0].ndim == 2
  2373. def test_inference_on_pandas_objects(self):
  2374. # GH#56012
  2375. idx = Index([Timestamp("2019-12-31")], dtype=object)
  2376. with tm.assert_produces_warning(FutureWarning, match="Dtype inference"):
  2377. result = DataFrame(idx, columns=["a"])
  2378. assert result.dtypes.iloc[0] != np.object_
  2379. result = DataFrame({"a": idx})
  2380. assert result.dtypes.iloc[0] == np.object_
  2381. ser = Series([Timestamp("2019-12-31")], dtype=object)
  2382. with tm.assert_produces_warning(FutureWarning, match="Dtype inference"):
  2383. result = DataFrame(ser, columns=["a"])
  2384. assert result.dtypes.iloc[0] != np.object_
  2385. result = DataFrame({"a": ser})
  2386. assert result.dtypes.iloc[0] == np.object_
  2387. class TestDataFrameConstructorIndexInference:
  2388. def test_frame_from_dict_of_series_overlapping_monthly_period_indexes(self):
  2389. rng1 = pd.period_range("1/1/1999", "1/1/2012", freq="M")
  2390. s1 = Series(np.random.default_rng(2).standard_normal(len(rng1)), rng1)
  2391. rng2 = pd.period_range("1/1/1980", "12/1/2001", freq="M")
  2392. s2 = Series(np.random.default_rng(2).standard_normal(len(rng2)), rng2)
  2393. df = DataFrame({"s1": s1, "s2": s2})
  2394. exp = pd.period_range("1/1/1980", "1/1/2012", freq="M")
  2395. tm.assert_index_equal(df.index, exp)
  2396. def test_frame_from_dict_with_mixed_tzaware_indexes(self):
  2397. # GH#44091
  2398. dti = date_range("2016-01-01", periods=3)
  2399. ser1 = Series(range(3), index=dti)
  2400. ser2 = Series(range(3), index=dti.tz_localize("UTC"))
  2401. ser3 = Series(range(3), index=dti.tz_localize("US/Central"))
  2402. ser4 = Series(range(3))
  2403. # no tz-naive, but we do have mixed tzs and a non-DTI
  2404. df1 = DataFrame({"A": ser2, "B": ser3, "C": ser4})
  2405. exp_index = Index(
  2406. list(ser2.index) + list(ser3.index) + list(ser4.index), dtype=object
  2407. )
  2408. tm.assert_index_equal(df1.index, exp_index)
  2409. df2 = DataFrame({"A": ser2, "C": ser4, "B": ser3})
  2410. exp_index3 = Index(
  2411. list(ser2.index) + list(ser4.index) + list(ser3.index), dtype=object
  2412. )
  2413. tm.assert_index_equal(df2.index, exp_index3)
  2414. df3 = DataFrame({"B": ser3, "A": ser2, "C": ser4})
  2415. exp_index3 = Index(
  2416. list(ser3.index) + list(ser2.index) + list(ser4.index), dtype=object
  2417. )
  2418. tm.assert_index_equal(df3.index, exp_index3)
  2419. df4 = DataFrame({"C": ser4, "B": ser3, "A": ser2})
  2420. exp_index4 = Index(
  2421. list(ser4.index) + list(ser3.index) + list(ser2.index), dtype=object
  2422. )
  2423. tm.assert_index_equal(df4.index, exp_index4)
  2424. # TODO: not clear if these raising is desired (no extant tests),
  2425. # but this is de facto behavior 2021-12-22
  2426. msg = "Cannot join tz-naive with tz-aware DatetimeIndex"
  2427. with pytest.raises(TypeError, match=msg):
  2428. DataFrame({"A": ser2, "B": ser3, "C": ser4, "D": ser1})
  2429. with pytest.raises(TypeError, match=msg):
  2430. DataFrame({"A": ser2, "B": ser3, "D": ser1})
  2431. with pytest.raises(TypeError, match=msg):
  2432. DataFrame({"D": ser1, "A": ser2, "B": ser3})
  2433. @pytest.mark.parametrize(
  2434. "key_val, col_vals, col_type",
  2435. [
  2436. ["3", ["3", "4"], "utf8"],
  2437. [3, [3, 4], "int8"],
  2438. ],
  2439. )
  2440. def test_dict_data_arrow_column_expansion(self, key_val, col_vals, col_type):
  2441. # GH 53617
  2442. pa = pytest.importorskip("pyarrow")
  2443. cols = pd.arrays.ArrowExtensionArray(
  2444. pa.array(col_vals, type=pa.dictionary(pa.int8(), getattr(pa, col_type)()))
  2445. )
  2446. result = DataFrame({key_val: [1, 2]}, columns=cols)
  2447. expected = DataFrame([[1, np.nan], [2, np.nan]], columns=cols)
  2448. expected.isetitem(1, expected.iloc[:, 1].astype(object))
  2449. tm.assert_frame_equal(result, expected)
  2450. class TestDataFrameConstructorWithDtypeCoercion:
  2451. def test_floating_values_integer_dtype(self):
  2452. # GH#40110 make DataFrame behavior with arraylike floating data and
  2453. # inty dtype match Series behavior
  2454. arr = np.random.default_rng(2).standard_normal((10, 5))
  2455. # GH#49599 in 2.0 we raise instead of either
  2456. # a) silently ignoring dtype and returningfloat (the old Series behavior) or
  2457. # b) rounding (the old DataFrame behavior)
  2458. msg = "Trying to coerce float values to integers"
  2459. with pytest.raises(ValueError, match=msg):
  2460. DataFrame(arr, dtype="i8")
  2461. df = DataFrame(arr.round(), dtype="i8")
  2462. assert (df.dtypes == "i8").all()
  2463. # with NaNs, we go through a different path with a different warning
  2464. arr[0, 0] = np.nan
  2465. msg = r"Cannot convert non-finite values \(NA or inf\) to integer"
  2466. with pytest.raises(IntCastingNaNError, match=msg):
  2467. DataFrame(arr, dtype="i8")
  2468. with pytest.raises(IntCastingNaNError, match=msg):
  2469. Series(arr[0], dtype="i8")
  2470. # The future (raising) behavior matches what we would get via astype:
  2471. msg = r"Cannot convert non-finite values \(NA or inf\) to integer"
  2472. with pytest.raises(IntCastingNaNError, match=msg):
  2473. DataFrame(arr).astype("i8")
  2474. with pytest.raises(IntCastingNaNError, match=msg):
  2475. Series(arr[0]).astype("i8")
  2476. class TestDataFrameConstructorWithDatetimeTZ:
  2477. @pytest.mark.parametrize("tz", ["US/Eastern", "dateutil/US/Eastern"])
  2478. def test_construction_preserves_tzaware_dtypes(self, tz):
  2479. # after GH#7822
  2480. # these retain the timezones on dict construction
  2481. dr = date_range("2011/1/1", "2012/1/1", freq="W-FRI")
  2482. dr_tz = dr.tz_localize(tz)
  2483. df = DataFrame({"A": "foo", "B": dr_tz}, index=dr)
  2484. tz_expected = DatetimeTZDtype("ns", dr_tz.tzinfo)
  2485. assert df["B"].dtype == tz_expected
  2486. # GH#2810 (with timezones)
  2487. datetimes_naive = [ts.to_pydatetime() for ts in dr]
  2488. datetimes_with_tz = [ts.to_pydatetime() for ts in dr_tz]
  2489. df = DataFrame({"dr": dr})
  2490. df["dr_tz"] = dr_tz
  2491. df["datetimes_naive"] = datetimes_naive
  2492. df["datetimes_with_tz"] = datetimes_with_tz
  2493. result = df.dtypes
  2494. expected = Series(
  2495. [
  2496. np.dtype("datetime64[ns]"),
  2497. DatetimeTZDtype(tz=tz),
  2498. np.dtype("datetime64[ns]"),
  2499. DatetimeTZDtype(tz=tz),
  2500. ],
  2501. index=["dr", "dr_tz", "datetimes_naive", "datetimes_with_tz"],
  2502. )
  2503. tm.assert_series_equal(result, expected)
  2504. @pytest.mark.parametrize("pydt", [True, False])
  2505. def test_constructor_data_aware_dtype_naive(self, tz_aware_fixture, pydt):
  2506. # GH#25843, GH#41555, GH#33401
  2507. tz = tz_aware_fixture
  2508. ts = Timestamp("2019", tz=tz)
  2509. if pydt:
  2510. ts = ts.to_pydatetime()
  2511. msg = (
  2512. "Cannot convert timezone-aware data to timezone-naive dtype. "
  2513. r"Use pd.Series\(values\).dt.tz_localize\(None\) instead."
  2514. )
  2515. with pytest.raises(ValueError, match=msg):
  2516. DataFrame({0: [ts]}, dtype="datetime64[ns]")
  2517. msg2 = "Cannot unbox tzaware Timestamp to tznaive dtype"
  2518. with pytest.raises(TypeError, match=msg2):
  2519. DataFrame({0: ts}, index=[0], dtype="datetime64[ns]")
  2520. with pytest.raises(ValueError, match=msg):
  2521. DataFrame([ts], dtype="datetime64[ns]")
  2522. with pytest.raises(ValueError, match=msg):
  2523. DataFrame(np.array([ts], dtype=object), dtype="datetime64[ns]")
  2524. with pytest.raises(TypeError, match=msg2):
  2525. DataFrame(ts, index=[0], columns=[0], dtype="datetime64[ns]")
  2526. with pytest.raises(ValueError, match=msg):
  2527. DataFrame([Series([ts])], dtype="datetime64[ns]")
  2528. with pytest.raises(ValueError, match=msg):
  2529. DataFrame([[ts]], columns=[0], dtype="datetime64[ns]")
  2530. def test_from_dict(self):
  2531. # 8260
  2532. # support datetime64 with tz
  2533. idx = Index(date_range("20130101", periods=3, tz="US/Eastern"), name="foo")
  2534. dr = date_range("20130110", periods=3)
  2535. # construction
  2536. df = DataFrame({"A": idx, "B": dr})
  2537. assert df["A"].dtype, "M8[ns, US/Eastern"
  2538. assert df["A"].name == "A"
  2539. tm.assert_series_equal(df["A"], Series(idx, name="A"))
  2540. tm.assert_series_equal(df["B"], Series(dr, name="B"))
  2541. def test_from_index(self):
  2542. # from index
  2543. idx2 = date_range("20130101", periods=3, tz="US/Eastern", name="foo")
  2544. df2 = DataFrame(idx2)
  2545. tm.assert_series_equal(df2["foo"], Series(idx2, name="foo"))
  2546. df2 = DataFrame(Series(idx2))
  2547. tm.assert_series_equal(df2["foo"], Series(idx2, name="foo"))
  2548. idx2 = date_range("20130101", periods=3, tz="US/Eastern")
  2549. df2 = DataFrame(idx2)
  2550. tm.assert_series_equal(df2[0], Series(idx2, name=0))
  2551. df2 = DataFrame(Series(idx2))
  2552. tm.assert_series_equal(df2[0], Series(idx2, name=0))
  2553. def test_frame_dict_constructor_datetime64_1680(self):
  2554. dr = date_range("1/1/2012", periods=10)
  2555. s = Series(dr, index=dr)
  2556. # it works!
  2557. DataFrame({"a": "foo", "b": s}, index=dr)
  2558. DataFrame({"a": "foo", "b": s.values}, index=dr)
  2559. def test_frame_datetime64_mixed_index_ctor_1681(self):
  2560. dr = date_range("2011/1/1", "2012/1/1", freq="W-FRI")
  2561. ts = Series(dr)
  2562. # it works!
  2563. d = DataFrame({"A": "foo", "B": ts}, index=dr)
  2564. assert d["B"].isna().all()
  2565. def test_frame_timeseries_column(self):
  2566. # GH19157
  2567. dr = date_range(
  2568. start="20130101T10:00:00", periods=3, freq="min", tz="US/Eastern"
  2569. )
  2570. result = DataFrame(dr, columns=["timestamps"])
  2571. expected = DataFrame(
  2572. {
  2573. "timestamps": [
  2574. Timestamp("20130101T10:00:00", tz="US/Eastern"),
  2575. Timestamp("20130101T10:01:00", tz="US/Eastern"),
  2576. Timestamp("20130101T10:02:00", tz="US/Eastern"),
  2577. ]
  2578. }
  2579. )
  2580. tm.assert_frame_equal(result, expected)
  2581. def test_nested_dict_construction(self):
  2582. # GH22227
  2583. columns = ["Nevada", "Ohio"]
  2584. pop = {
  2585. "Nevada": {2001: 2.4, 2002: 2.9},
  2586. "Ohio": {2000: 1.5, 2001: 1.7, 2002: 3.6},
  2587. }
  2588. result = DataFrame(pop, index=[2001, 2002, 2003], columns=columns)
  2589. expected = DataFrame(
  2590. [(2.4, 1.7), (2.9, 3.6), (np.nan, np.nan)],
  2591. columns=columns,
  2592. index=Index([2001, 2002, 2003]),
  2593. )
  2594. tm.assert_frame_equal(result, expected)
  2595. def test_from_tzaware_object_array(self):
  2596. # GH#26825 2D object array of tzaware timestamps should not raise
  2597. dti = date_range("2016-04-05 04:30", periods=3, tz="UTC")
  2598. data = dti._data.astype(object).reshape(1, -1)
  2599. df = DataFrame(data)
  2600. assert df.shape == (1, 3)
  2601. assert (df.dtypes == dti.dtype).all()
  2602. assert (df == dti).all().all()
  2603. def test_from_tzaware_mixed_object_array(self):
  2604. # GH#26825
  2605. arr = np.array(
  2606. [
  2607. [
  2608. Timestamp("2013-01-01 00:00:00"),
  2609. Timestamp("2013-01-02 00:00:00"),
  2610. Timestamp("2013-01-03 00:00:00"),
  2611. ],
  2612. [
  2613. Timestamp("2013-01-01 00:00:00-0500", tz="US/Eastern"),
  2614. pd.NaT,
  2615. Timestamp("2013-01-03 00:00:00-0500", tz="US/Eastern"),
  2616. ],
  2617. [
  2618. Timestamp("2013-01-01 00:00:00+0100", tz="CET"),
  2619. pd.NaT,
  2620. Timestamp("2013-01-03 00:00:00+0100", tz="CET"),
  2621. ],
  2622. ],
  2623. dtype=object,
  2624. ).T
  2625. res = DataFrame(arr, columns=["A", "B", "C"])
  2626. expected_dtypes = [
  2627. "datetime64[ns]",
  2628. "datetime64[ns, US/Eastern]",
  2629. "datetime64[ns, CET]",
  2630. ]
  2631. assert (res.dtypes == expected_dtypes).all()
  2632. def test_from_2d_ndarray_with_dtype(self):
  2633. # GH#12513
  2634. array_dim2 = np.arange(10).reshape((5, 2))
  2635. df = DataFrame(array_dim2, dtype="datetime64[ns, UTC]")
  2636. expected = DataFrame(array_dim2).astype("datetime64[ns, UTC]")
  2637. tm.assert_frame_equal(df, expected)
  2638. @pytest.mark.parametrize("typ", [set, frozenset])
  2639. def test_construction_from_set_raises(self, typ):
  2640. # https://github.com/pandas-dev/pandas/issues/32582
  2641. values = typ({1, 2, 3})
  2642. msg = f"'{typ.__name__}' type is unordered"
  2643. with pytest.raises(TypeError, match=msg):
  2644. DataFrame({"a": values})
  2645. with pytest.raises(TypeError, match=msg):
  2646. Series(values)
  2647. def test_construction_from_ndarray_datetimelike(self):
  2648. # ensure the underlying arrays are properly wrapped as EA when
  2649. # constructed from 2D ndarray
  2650. arr = np.arange(0, 12, dtype="datetime64[ns]").reshape(4, 3)
  2651. df = DataFrame(arr)
  2652. assert all(isinstance(arr, DatetimeArray) for arr in df._mgr.arrays)
  2653. def test_construction_from_ndarray_with_eadtype_mismatched_columns(self):
  2654. arr = np.random.default_rng(2).standard_normal((10, 2))
  2655. dtype = pd.array([2.0]).dtype
  2656. msg = r"len\(arrays\) must match len\(columns\)"
  2657. with pytest.raises(ValueError, match=msg):
  2658. DataFrame(arr, columns=["foo"], dtype=dtype)
  2659. arr2 = pd.array([2.0, 3.0, 4.0])
  2660. with pytest.raises(ValueError, match=msg):
  2661. DataFrame(arr2, columns=["foo", "bar"])
  2662. def test_columns_indexes_raise_on_sets(self):
  2663. # GH 47215
  2664. data = [[1, 2, 3], [4, 5, 6]]
  2665. with pytest.raises(ValueError, match="index cannot be a set"):
  2666. DataFrame(data, index={"a", "b"})
  2667. with pytest.raises(ValueError, match="columns cannot be a set"):
  2668. DataFrame(data, columns={"a", "b", "c"})
  2669. # TODO: make this not cast to object in pandas 3.0
  2670. @pytest.mark.skipif(
  2671. not np_version_gt2, reason="StringDType only available in numpy 2 and above"
  2672. )
  2673. @pytest.mark.parametrize(
  2674. "data",
  2675. [
  2676. {"a": ["a", "b", "c"], "b": [1.0, 2.0, 3.0], "c": ["d", "e", "f"]},
  2677. ],
  2678. )
  2679. def test_np_string_array_object_cast(self, data):
  2680. from numpy.dtypes import StringDType
  2681. data["a"] = np.array(data["a"], dtype=StringDType())
  2682. res = DataFrame(data)
  2683. assert res["a"].dtype == np.object_
  2684. assert (res["a"] == data["a"]).all()
  2685. def get1(obj): # TODO: make a helper in tm?
  2686. if isinstance(obj, Series):
  2687. return obj.iloc[0]
  2688. else:
  2689. return obj.iloc[0, 0]
  2690. class TestFromScalar:
  2691. @pytest.fixture(params=[list, dict, None])
  2692. def box(self, request):
  2693. return request.param
  2694. @pytest.fixture
  2695. def constructor(self, frame_or_series, box):
  2696. extra = {"index": range(2)}
  2697. if frame_or_series is DataFrame:
  2698. extra["columns"] = ["A"]
  2699. if box is None:
  2700. return functools.partial(frame_or_series, **extra)
  2701. elif box is dict:
  2702. if frame_or_series is Series:
  2703. return lambda x, **kwargs: frame_or_series(
  2704. {0: x, 1: x}, **extra, **kwargs
  2705. )
  2706. else:
  2707. return lambda x, **kwargs: frame_or_series({"A": x}, **extra, **kwargs)
  2708. elif frame_or_series is Series:
  2709. return lambda x, **kwargs: frame_or_series([x, x], **extra, **kwargs)
  2710. else:
  2711. return lambda x, **kwargs: frame_or_series({"A": [x, x]}, **extra, **kwargs)
  2712. @pytest.mark.parametrize("dtype", ["M8[ns]", "m8[ns]"])
  2713. def test_from_nat_scalar(self, dtype, constructor):
  2714. obj = constructor(pd.NaT, dtype=dtype)
  2715. assert np.all(obj.dtypes == dtype)
  2716. assert np.all(obj.isna())
  2717. def test_from_timedelta_scalar_preserves_nanos(self, constructor):
  2718. td = Timedelta(1)
  2719. obj = constructor(td, dtype="m8[ns]")
  2720. assert get1(obj) == td
  2721. def test_from_timestamp_scalar_preserves_nanos(self, constructor, fixed_now_ts):
  2722. ts = fixed_now_ts + Timedelta(1)
  2723. obj = constructor(ts, dtype="M8[ns]")
  2724. assert get1(obj) == ts
  2725. def test_from_timedelta64_scalar_object(self, constructor):
  2726. td = Timedelta(1)
  2727. td64 = td.to_timedelta64()
  2728. obj = constructor(td64, dtype=object)
  2729. assert isinstance(get1(obj), np.timedelta64)
  2730. @pytest.mark.parametrize("cls", [np.datetime64, np.timedelta64])
  2731. def test_from_scalar_datetimelike_mismatched(self, constructor, cls):
  2732. scalar = cls("NaT", "ns")
  2733. dtype = {np.datetime64: "m8[ns]", np.timedelta64: "M8[ns]"}[cls]
  2734. if cls is np.datetime64:
  2735. msg1 = "Invalid type for timedelta scalar: <class 'numpy.datetime64'>"
  2736. else:
  2737. msg1 = "<class 'numpy.timedelta64'> is not convertible to datetime"
  2738. msg = "|".join(["Cannot cast", msg1])
  2739. with pytest.raises(TypeError, match=msg):
  2740. constructor(scalar, dtype=dtype)
  2741. scalar = cls(4, "ns")
  2742. with pytest.raises(TypeError, match=msg):
  2743. constructor(scalar, dtype=dtype)
  2744. @pytest.mark.parametrize("cls", [datetime, np.datetime64])
  2745. def test_from_out_of_bounds_ns_datetime(
  2746. self, constructor, cls, request, box, frame_or_series
  2747. ):
  2748. # scalar that won't fit in nanosecond dt64, but will fit in microsecond
  2749. if box is list or (frame_or_series is Series and box is dict):
  2750. mark = pytest.mark.xfail(
  2751. reason="Timestamp constructor has been updated to cast dt64 to "
  2752. "non-nano, but DatetimeArray._from_sequence has not",
  2753. strict=True,
  2754. )
  2755. request.applymarker(mark)
  2756. scalar = datetime(9999, 1, 1)
  2757. exp_dtype = "M8[us]" # pydatetime objects default to this reso
  2758. if cls is np.datetime64:
  2759. scalar = np.datetime64(scalar, "D")
  2760. exp_dtype = "M8[s]" # closest reso to input
  2761. result = constructor(scalar)
  2762. item = get1(result)
  2763. dtype = tm.get_dtype(result)
  2764. assert type(item) is Timestamp
  2765. assert item.asm8.dtype == exp_dtype
  2766. assert dtype == exp_dtype
  2767. @pytest.mark.skip_ubsan
  2768. def test_out_of_s_bounds_datetime64(self, constructor):
  2769. scalar = np.datetime64(np.iinfo(np.int64).max, "D")
  2770. result = constructor(scalar)
  2771. item = get1(result)
  2772. assert type(item) is np.datetime64
  2773. dtype = tm.get_dtype(result)
  2774. assert dtype == object
  2775. @pytest.mark.parametrize("cls", [timedelta, np.timedelta64])
  2776. def test_from_out_of_bounds_ns_timedelta(
  2777. self, constructor, cls, request, box, frame_or_series
  2778. ):
  2779. # scalar that won't fit in nanosecond td64, but will fit in microsecond
  2780. if box is list or (frame_or_series is Series and box is dict):
  2781. mark = pytest.mark.xfail(
  2782. reason="TimedeltaArray constructor has been updated to cast td64 "
  2783. "to non-nano, but TimedeltaArray._from_sequence has not",
  2784. strict=True,
  2785. )
  2786. request.applymarker(mark)
  2787. scalar = datetime(9999, 1, 1) - datetime(1970, 1, 1)
  2788. exp_dtype = "m8[us]" # smallest reso that fits
  2789. if cls is np.timedelta64:
  2790. scalar = np.timedelta64(scalar, "D")
  2791. exp_dtype = "m8[s]" # closest reso to input
  2792. result = constructor(scalar)
  2793. item = get1(result)
  2794. dtype = tm.get_dtype(result)
  2795. assert type(item) is Timedelta
  2796. assert item.asm8.dtype == exp_dtype
  2797. assert dtype == exp_dtype
  2798. @pytest.mark.skip_ubsan
  2799. @pytest.mark.parametrize("cls", [np.datetime64, np.timedelta64])
  2800. def test_out_of_s_bounds_timedelta64(self, constructor, cls):
  2801. scalar = cls(np.iinfo(np.int64).max, "D")
  2802. result = constructor(scalar)
  2803. item = get1(result)
  2804. assert type(item) is cls
  2805. dtype = tm.get_dtype(result)
  2806. assert dtype == object
  2807. def test_tzaware_data_tznaive_dtype(self, constructor, box, frame_or_series):
  2808. tz = "US/Eastern"
  2809. ts = Timestamp("2019", tz=tz)
  2810. if box is None or (frame_or_series is DataFrame and box is dict):
  2811. msg = "Cannot unbox tzaware Timestamp to tznaive dtype"
  2812. err = TypeError
  2813. else:
  2814. msg = (
  2815. "Cannot convert timezone-aware data to timezone-naive dtype. "
  2816. r"Use pd.Series\(values\).dt.tz_localize\(None\) instead."
  2817. )
  2818. err = ValueError
  2819. with pytest.raises(err, match=msg):
  2820. constructor(ts, dtype="M8[ns]")
  2821. # TODO: better location for this test?
  2822. class TestAllowNonNano:
  2823. # Until 2.0, we do not preserve non-nano dt64/td64 when passed as ndarray,
  2824. # but do preserve it when passed as DTA/TDA
  2825. @pytest.fixture(params=[True, False])
  2826. def as_td(self, request):
  2827. return request.param
  2828. @pytest.fixture
  2829. def arr(self, as_td):
  2830. values = np.arange(5).astype(np.int64).view("M8[s]")
  2831. if as_td:
  2832. values = values - values[0]
  2833. return TimedeltaArray._simple_new(values, dtype=values.dtype)
  2834. else:
  2835. return DatetimeArray._simple_new(values, dtype=values.dtype)
  2836. def test_index_allow_non_nano(self, arr):
  2837. idx = Index(arr)
  2838. assert idx.dtype == arr.dtype
  2839. def test_dti_tdi_allow_non_nano(self, arr, as_td):
  2840. if as_td:
  2841. idx = pd.TimedeltaIndex(arr)
  2842. else:
  2843. idx = DatetimeIndex(arr)
  2844. assert idx.dtype == arr.dtype
  2845. def test_series_allow_non_nano(self, arr):
  2846. ser = Series(arr)
  2847. assert ser.dtype == arr.dtype
  2848. def test_frame_allow_non_nano(self, arr):
  2849. df = DataFrame(arr)
  2850. assert df.dtypes[0] == arr.dtype
  2851. def test_frame_from_dict_allow_non_nano(self, arr):
  2852. df = DataFrame({0: arr})
  2853. assert df.dtypes[0] == arr.dtype