test_to_datetime.py 144 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660266126622663266426652666266726682669267026712672267326742675267626772678267926802681268226832684268526862687268826892690269126922693269426952696269726982699270027012702270327042705270627072708270927102711271227132714271527162717271827192720272127222723272427252726272727282729273027312732273327342735273627372738273927402741274227432744274527462747274827492750275127522753275427552756275727582759276027612762276327642765276627672768276927702771277227732774277527762777277827792780278127822783278427852786278727882789279027912792279327942795279627972798279928002801280228032804280528062807280828092810281128122813281428152816281728182819282028212822282328242825282628272828282928302831283228332834283528362837283828392840284128422843284428452846284728482849285028512852285328542855285628572858285928602861286228632864286528662867286828692870287128722873287428752876287728782879288028812882288328842885288628872888288928902891289228932894289528962897289828992900290129022903290429052906290729082909291029112912291329142915291629172918291929202921292229232924292529262927292829292930293129322933293429352936293729382939294029412942294329442945294629472948294929502951295229532954295529562957295829592960296129622963296429652966296729682969297029712972297329742975297629772978297929802981298229832984298529862987298829892990299129922993299429952996299729982999300030013002300330043005300630073008300930103011301230133014301530163017301830193020302130223023302430253026302730283029303030313032303330343035303630373038303930403041304230433044304530463047304830493050305130523053305430553056305730583059306030613062306330643065306630673068306930703071307230733074307530763077307830793080308130823083308430853086308730883089309030913092309330943095309630973098309931003101310231033104310531063107310831093110311131123113311431153116311731183119312031213122312331243125312631273128312931303131313231333134313531363137313831393140314131423143314431453146314731483149315031513152315331543155315631573158315931603161316231633164316531663167316831693170317131723173317431753176317731783179318031813182318331843185318631873188318931903191319231933194319531963197319831993200320132023203320432053206320732083209321032113212321332143215321632173218321932203221322232233224322532263227322832293230323132323233323432353236323732383239324032413242324332443245324632473248324932503251325232533254325532563257325832593260326132623263326432653266326732683269327032713272327332743275327632773278327932803281328232833284328532863287328832893290329132923293329432953296329732983299330033013302330333043305330633073308330933103311331233133314331533163317331833193320332133223323332433253326332733283329333033313332333333343335333633373338333933403341334233433344334533463347334833493350335133523353335433553356335733583359336033613362336333643365336633673368336933703371337233733374337533763377337833793380338133823383338433853386338733883389339033913392339333943395339633973398339934003401340234033404340534063407340834093410341134123413341434153416341734183419342034213422342334243425342634273428342934303431343234333434343534363437343834393440344134423443344434453446344734483449345034513452345334543455345634573458345934603461346234633464346534663467346834693470347134723473347434753476347734783479348034813482348334843485348634873488348934903491349234933494349534963497349834993500350135023503350435053506350735083509351035113512351335143515351635173518351935203521352235233524352535263527352835293530353135323533353435353536353735383539354035413542354335443545354635473548354935503551355235533554355535563557355835593560356135623563356435653566356735683569357035713572357335743575357635773578357935803581358235833584358535863587358835893590359135923593359435953596359735983599360036013602360336043605360636073608360936103611361236133614361536163617361836193620362136223623362436253626362736283629363036313632363336343635363636373638363936403641364236433644364536463647364836493650365136523653365436553656365736583659366036613662366336643665366636673668366936703671367236733674367536763677367836793680368136823683368436853686368736883689369036913692369336943695369636973698369937003701370237033704370537063707370837093710371137123713371437153716371737183719372037213722372337243725372637273728372937303731373237333734373537363737373837393740374137423743374437453746374737483749375037513752375337543755375637573758375937603761376237633764376537663767376837693770377137723773377437753776377737783779378037813782378337843785378637873788378937903791379237933794379537963797379837993800380138023803380438053806380738083809381038113812381338143815381638173818381938203821382238233824382538263827382838293830383138323833383438353836383738383839384038413842384338443845384638473848384938503851385238533854385538563857385838593860386138623863386438653866386738683869387038713872387338743875387638773878387938803881388238833884388538863887388838893890389138923893389438953896389738983899390039013902390339043905390639073908390939103911391239133914391539163917391839193920392139223923392439253926
  1. """ test to_datetime """
  2. import calendar
  3. from collections import deque
  4. from datetime import (
  5. date,
  6. datetime,
  7. timedelta,
  8. timezone,
  9. )
  10. from decimal import Decimal
  11. import locale
  12. from dateutil.parser import parse
  13. from dateutil.tz.tz import tzoffset
  14. import numpy as np
  15. import pytest
  16. import pytz
  17. from pandas._libs import tslib
  18. from pandas._libs.tslibs import (
  19. iNaT,
  20. parsing,
  21. )
  22. from pandas.compat import PY314
  23. from pandas.errors import (
  24. OutOfBoundsDatetime,
  25. OutOfBoundsTimedelta,
  26. )
  27. import pandas.util._test_decorators as td
  28. from pandas.core.dtypes.common import is_datetime64_ns_dtype
  29. import pandas as pd
  30. from pandas import (
  31. DataFrame,
  32. DatetimeIndex,
  33. Index,
  34. NaT,
  35. Series,
  36. Timestamp,
  37. date_range,
  38. isna,
  39. to_datetime,
  40. )
  41. import pandas._testing as tm
  42. from pandas.core.arrays import DatetimeArray
  43. from pandas.core.tools import datetimes as tools
  44. from pandas.core.tools.datetimes import start_caching_at
  45. PARSING_ERR_MSG = (
  46. r"You might want to try:\n"
  47. r" - passing `format` if your strings have a consistent format;\n"
  48. r" - passing `format=\'ISO8601\'` if your strings are all ISO8601 "
  49. r"but not necessarily in exactly the same format;\n"
  50. r" - passing `format=\'mixed\'`, and the format will be inferred "
  51. r"for each element individually. You might want to use `dayfirst` "
  52. r"alongside this."
  53. )
  54. if PY314:
  55. NOT_99 = ", not 99"
  56. DAY_IS_OUT_OF_RANGE = (
  57. r"day \d{1,2} must be in range 1\.\.\d{1,2} for month \d{1,2} in year \d{4}"
  58. ", at position 0"
  59. )
  60. else:
  61. NOT_99 = ""
  62. DAY_IS_OUT_OF_RANGE = "day is out of range for month, at position 0"
  63. pytestmark = pytest.mark.filterwarnings(
  64. "ignore:errors='ignore' is deprecated:FutureWarning"
  65. )
  66. @pytest.fixture(params=[True, False])
  67. def cache(request):
  68. """
  69. cache keyword to pass to to_datetime.
  70. """
  71. return request.param
  72. class TestTimeConversionFormats:
  73. @pytest.mark.parametrize("readonly", [True, False])
  74. def test_to_datetime_readonly(self, readonly):
  75. # GH#34857
  76. arr = np.array([], dtype=object)
  77. if readonly:
  78. arr.setflags(write=False)
  79. result = to_datetime(arr)
  80. expected = to_datetime([])
  81. tm.assert_index_equal(result, expected)
  82. @pytest.mark.parametrize(
  83. "format, expected",
  84. [
  85. [
  86. "%d/%m/%Y",
  87. [Timestamp("20000101"), Timestamp("20000201"), Timestamp("20000301")],
  88. ],
  89. [
  90. "%m/%d/%Y",
  91. [Timestamp("20000101"), Timestamp("20000102"), Timestamp("20000103")],
  92. ],
  93. ],
  94. )
  95. def test_to_datetime_format(self, cache, index_or_series, format, expected):
  96. values = index_or_series(["1/1/2000", "1/2/2000", "1/3/2000"])
  97. result = to_datetime(values, format=format, cache=cache)
  98. expected = index_or_series(expected)
  99. tm.assert_equal(result, expected)
  100. @pytest.mark.parametrize(
  101. "arg, expected, format",
  102. [
  103. ["1/1/2000", "20000101", "%d/%m/%Y"],
  104. ["1/1/2000", "20000101", "%m/%d/%Y"],
  105. ["1/2/2000", "20000201", "%d/%m/%Y"],
  106. ["1/2/2000", "20000102", "%m/%d/%Y"],
  107. ["1/3/2000", "20000301", "%d/%m/%Y"],
  108. ["1/3/2000", "20000103", "%m/%d/%Y"],
  109. ],
  110. )
  111. def test_to_datetime_format_scalar(self, cache, arg, expected, format):
  112. result = to_datetime(arg, format=format, cache=cache)
  113. expected = Timestamp(expected)
  114. assert result == expected
  115. def test_to_datetime_format_YYYYMMDD(self, cache):
  116. ser = Series([19801222, 19801222] + [19810105] * 5)
  117. expected = Series([Timestamp(x) for x in ser.apply(str)])
  118. result = to_datetime(ser, format="%Y%m%d", cache=cache)
  119. tm.assert_series_equal(result, expected)
  120. result = to_datetime(ser.apply(str), format="%Y%m%d", cache=cache)
  121. tm.assert_series_equal(result, expected)
  122. def test_to_datetime_format_YYYYMMDD_with_nat(self, cache):
  123. # Explicit cast to float to explicit cast when setting np.nan
  124. ser = Series([19801222, 19801222] + [19810105] * 5, dtype="float")
  125. # with NaT
  126. expected = Series(
  127. [Timestamp("19801222"), Timestamp("19801222")] + [Timestamp("19810105")] * 5
  128. )
  129. expected[2] = np.nan
  130. ser[2] = np.nan
  131. result = to_datetime(ser, format="%Y%m%d", cache=cache)
  132. tm.assert_series_equal(result, expected)
  133. # string with NaT
  134. ser2 = ser.apply(str)
  135. ser2[2] = "nat"
  136. with pytest.raises(
  137. ValueError,
  138. match=(
  139. 'unconverted data remains when parsing with format "%Y%m%d": ".0", '
  140. "at position 0"
  141. ),
  142. ):
  143. # https://github.com/pandas-dev/pandas/issues/50051
  144. to_datetime(ser2, format="%Y%m%d", cache=cache)
  145. def test_to_datetime_format_YYYYMM_with_nat(self, cache):
  146. # https://github.com/pandas-dev/pandas/issues/50237
  147. # Explicit cast to float to explicit cast when setting np.nan
  148. ser = Series([198012, 198012] + [198101] * 5, dtype="float")
  149. expected = Series(
  150. [Timestamp("19801201"), Timestamp("19801201")] + [Timestamp("19810101")] * 5
  151. )
  152. expected[2] = np.nan
  153. ser[2] = np.nan
  154. result = to_datetime(ser, format="%Y%m", cache=cache)
  155. tm.assert_series_equal(result, expected)
  156. def test_to_datetime_format_YYYYMMDD_ignore(self, cache):
  157. # coercion
  158. # GH 7930, GH 14487
  159. ser = Series([20121231, 20141231, 99991231])
  160. result = to_datetime(ser, format="%Y%m%d", errors="ignore", cache=cache)
  161. expected = Series(
  162. [20121231, 20141231, 99991231],
  163. dtype=object,
  164. )
  165. tm.assert_series_equal(result, expected)
  166. def test_to_datetime_format_YYYYMMDD_ignore_with_outofbounds(self, cache):
  167. # https://github.com/pandas-dev/pandas/issues/26493
  168. result = to_datetime(
  169. ["15010101", "20150101", np.nan],
  170. format="%Y%m%d",
  171. errors="ignore",
  172. cache=cache,
  173. )
  174. expected = Index(["15010101", "20150101", np.nan], dtype=object)
  175. tm.assert_index_equal(result, expected)
  176. def test_to_datetime_format_YYYYMMDD_coercion(self, cache):
  177. # coercion
  178. # GH 7930
  179. ser = Series([20121231, 20141231, 99991231])
  180. result = to_datetime(ser, format="%Y%m%d", errors="coerce", cache=cache)
  181. expected = Series(["20121231", "20141231", "NaT"], dtype="M8[ns]")
  182. tm.assert_series_equal(result, expected)
  183. @pytest.mark.parametrize(
  184. "input_s",
  185. [
  186. # Null values with Strings
  187. ["19801222", "20010112", None],
  188. ["19801222", "20010112", np.nan],
  189. ["19801222", "20010112", NaT],
  190. ["19801222", "20010112", "NaT"],
  191. # Null values with Integers
  192. [19801222, 20010112, None],
  193. [19801222, 20010112, np.nan],
  194. [19801222, 20010112, NaT],
  195. [19801222, 20010112, "NaT"],
  196. ],
  197. )
  198. def test_to_datetime_format_YYYYMMDD_with_none(self, input_s):
  199. # GH 30011
  200. # format='%Y%m%d'
  201. # with None
  202. expected = Series([Timestamp("19801222"), Timestamp("20010112"), NaT])
  203. result = Series(to_datetime(input_s, format="%Y%m%d"))
  204. tm.assert_series_equal(result, expected)
  205. @pytest.mark.parametrize(
  206. "input_s, expected",
  207. [
  208. # NaN before strings with invalid date values
  209. [
  210. Series(["19801222", np.nan, "20010012", "10019999"]),
  211. Series([Timestamp("19801222"), np.nan, np.nan, np.nan]),
  212. ],
  213. # NaN after strings with invalid date values
  214. [
  215. Series(["19801222", "20010012", "10019999", np.nan]),
  216. Series([Timestamp("19801222"), np.nan, np.nan, np.nan]),
  217. ],
  218. # NaN before integers with invalid date values
  219. [
  220. Series([20190813, np.nan, 20010012, 20019999]),
  221. Series([Timestamp("20190813"), np.nan, np.nan, np.nan]),
  222. ],
  223. # NaN after integers with invalid date values
  224. [
  225. Series([20190813, 20010012, np.nan, 20019999]),
  226. Series([Timestamp("20190813"), np.nan, np.nan, np.nan]),
  227. ],
  228. ],
  229. )
  230. def test_to_datetime_format_YYYYMMDD_overflow(self, input_s, expected):
  231. # GH 25512
  232. # format='%Y%m%d', errors='coerce'
  233. result = to_datetime(input_s, format="%Y%m%d", errors="coerce")
  234. tm.assert_series_equal(result, expected)
  235. @pytest.mark.parametrize(
  236. "data, format, expected",
  237. [
  238. ([pd.NA], "%Y%m%d%H%M%S", DatetimeIndex(["NaT"])),
  239. ([pd.NA], None, DatetimeIndex(["NaT"])),
  240. (
  241. [pd.NA, "20210202202020"],
  242. "%Y%m%d%H%M%S",
  243. DatetimeIndex(["NaT", "2021-02-02 20:20:20"]),
  244. ),
  245. (["201010", pd.NA], "%y%m%d", DatetimeIndex(["2020-10-10", "NaT"])),
  246. (["201010", pd.NA], "%d%m%y", DatetimeIndex(["2010-10-20", "NaT"])),
  247. ([None, np.nan, pd.NA], None, DatetimeIndex(["NaT", "NaT", "NaT"])),
  248. ([None, np.nan, pd.NA], "%Y%m%d", DatetimeIndex(["NaT", "NaT", "NaT"])),
  249. ],
  250. )
  251. def test_to_datetime_with_NA(self, data, format, expected):
  252. # GH#42957
  253. result = to_datetime(data, format=format)
  254. tm.assert_index_equal(result, expected)
  255. def test_to_datetime_with_NA_with_warning(self):
  256. # GH#42957
  257. result = to_datetime(["201010", pd.NA])
  258. expected = DatetimeIndex(["2010-10-20", "NaT"])
  259. tm.assert_index_equal(result, expected)
  260. def test_to_datetime_format_integer(self, cache):
  261. # GH 10178
  262. ser = Series([2000, 2001, 2002])
  263. expected = Series([Timestamp(x) for x in ser.apply(str)])
  264. result = to_datetime(ser, format="%Y", cache=cache)
  265. tm.assert_series_equal(result, expected)
  266. ser = Series([200001, 200105, 200206])
  267. expected = Series([Timestamp(x[:4] + "-" + x[4:]) for x in ser.apply(str)])
  268. result = to_datetime(ser, format="%Y%m", cache=cache)
  269. tm.assert_series_equal(result, expected)
  270. @pytest.mark.parametrize(
  271. "int_date, expected",
  272. [
  273. # valid date, length == 8
  274. [20121030, datetime(2012, 10, 30)],
  275. # short valid date, length == 6
  276. [199934, datetime(1999, 3, 4)],
  277. # long integer date partially parsed to datetime(2012,1,1), length > 8
  278. [2012010101, 2012010101],
  279. # invalid date partially parsed to datetime(2012,9,9), length == 8
  280. [20129930, 20129930],
  281. # short integer date partially parsed to datetime(2012,9,9), length < 8
  282. [2012993, 2012993],
  283. # short invalid date, length == 4
  284. [2121, 2121],
  285. ],
  286. )
  287. def test_int_to_datetime_format_YYYYMMDD_typeerror(self, int_date, expected):
  288. # GH 26583
  289. result = to_datetime(int_date, format="%Y%m%d", errors="ignore")
  290. assert result == expected
  291. def test_to_datetime_format_microsecond(self, cache):
  292. month_abbr = calendar.month_abbr[4]
  293. val = f"01-{month_abbr}-2011 00:00:01.978"
  294. format = "%d-%b-%Y %H:%M:%S.%f"
  295. result = to_datetime(val, format=format, cache=cache)
  296. exp = datetime.strptime(val, format)
  297. assert result == exp
  298. @pytest.mark.parametrize(
  299. "value, format, dt",
  300. [
  301. ["01/10/2010 15:20", "%m/%d/%Y %H:%M", Timestamp("2010-01-10 15:20")],
  302. ["01/10/2010 05:43", "%m/%d/%Y %I:%M", Timestamp("2010-01-10 05:43")],
  303. [
  304. "01/10/2010 13:56:01",
  305. "%m/%d/%Y %H:%M:%S",
  306. Timestamp("2010-01-10 13:56:01"),
  307. ],
  308. # The 3 tests below are locale-dependent.
  309. # They pass, except when the machine locale is zh_CN or it_IT .
  310. pytest.param(
  311. "01/10/2010 08:14 PM",
  312. "%m/%d/%Y %I:%M %p",
  313. Timestamp("2010-01-10 20:14"),
  314. marks=pytest.mark.xfail(
  315. locale.getlocale()[0] in ("zh_CN", "it_IT"),
  316. reason="fail on a CI build with LC_ALL=zh_CN.utf8/it_IT.utf8",
  317. strict=False,
  318. ),
  319. ),
  320. pytest.param(
  321. "01/10/2010 07:40 AM",
  322. "%m/%d/%Y %I:%M %p",
  323. Timestamp("2010-01-10 07:40"),
  324. marks=pytest.mark.xfail(
  325. locale.getlocale()[0] in ("zh_CN", "it_IT"),
  326. reason="fail on a CI build with LC_ALL=zh_CN.utf8/it_IT.utf8",
  327. strict=False,
  328. ),
  329. ),
  330. pytest.param(
  331. "01/10/2010 09:12:56 AM",
  332. "%m/%d/%Y %I:%M:%S %p",
  333. Timestamp("2010-01-10 09:12:56"),
  334. marks=pytest.mark.xfail(
  335. locale.getlocale()[0] in ("zh_CN", "it_IT"),
  336. reason="fail on a CI build with LC_ALL=zh_CN.utf8/it_IT.utf8",
  337. strict=False,
  338. ),
  339. ),
  340. ],
  341. )
  342. def test_to_datetime_format_time(self, cache, value, format, dt):
  343. assert to_datetime(value, format=format, cache=cache) == dt
  344. @td.skip_if_not_us_locale
  345. def test_to_datetime_with_non_exact(self, cache):
  346. # GH 10834
  347. # 8904
  348. # exact kw
  349. ser = Series(
  350. ["19MAY11", "foobar19MAY11", "19MAY11:00:00:00", "19MAY11 00:00:00Z"]
  351. )
  352. result = to_datetime(ser, format="%d%b%y", exact=False, cache=cache)
  353. expected = to_datetime(
  354. ser.str.extract(r"(\d+\w+\d+)", expand=False), format="%d%b%y", cache=cache
  355. )
  356. tm.assert_series_equal(result, expected)
  357. @pytest.mark.parametrize(
  358. "format, expected",
  359. [
  360. ("%Y-%m-%d", Timestamp(2000, 1, 3)),
  361. ("%Y-%d-%m", Timestamp(2000, 3, 1)),
  362. ("%Y-%m-%d %H", Timestamp(2000, 1, 3, 12)),
  363. ("%Y-%d-%m %H", Timestamp(2000, 3, 1, 12)),
  364. ("%Y-%m-%d %H:%M", Timestamp(2000, 1, 3, 12, 34)),
  365. ("%Y-%d-%m %H:%M", Timestamp(2000, 3, 1, 12, 34)),
  366. ("%Y-%m-%d %H:%M:%S", Timestamp(2000, 1, 3, 12, 34, 56)),
  367. ("%Y-%d-%m %H:%M:%S", Timestamp(2000, 3, 1, 12, 34, 56)),
  368. ("%Y-%m-%d %H:%M:%S.%f", Timestamp(2000, 1, 3, 12, 34, 56, 123456)),
  369. ("%Y-%d-%m %H:%M:%S.%f", Timestamp(2000, 3, 1, 12, 34, 56, 123456)),
  370. (
  371. "%Y-%m-%d %H:%M:%S.%f%z",
  372. Timestamp(2000, 1, 3, 12, 34, 56, 123456, tz="UTC+01:00"),
  373. ),
  374. (
  375. "%Y-%d-%m %H:%M:%S.%f%z",
  376. Timestamp(2000, 3, 1, 12, 34, 56, 123456, tz="UTC+01:00"),
  377. ),
  378. ],
  379. )
  380. def test_non_exact_doesnt_parse_whole_string(self, cache, format, expected):
  381. # https://github.com/pandas-dev/pandas/issues/50412
  382. # the formats alternate between ISO8601 and non-ISO8601 to check both paths
  383. result = to_datetime(
  384. "2000-01-03 12:34:56.123456+01:00", format=format, exact=False
  385. )
  386. assert result == expected
  387. @pytest.mark.parametrize(
  388. "arg",
  389. [
  390. "2012-01-01 09:00:00.000000001",
  391. "2012-01-01 09:00:00.000001",
  392. "2012-01-01 09:00:00.001",
  393. "2012-01-01 09:00:00.001000",
  394. "2012-01-01 09:00:00.001000000",
  395. ],
  396. )
  397. def test_parse_nanoseconds_with_formula(self, cache, arg):
  398. # GH8989
  399. # truncating the nanoseconds when a format was provided
  400. expected = to_datetime(arg, cache=cache)
  401. result = to_datetime(arg, format="%Y-%m-%d %H:%M:%S.%f", cache=cache)
  402. assert result == expected
  403. @pytest.mark.parametrize(
  404. "value,fmt,expected",
  405. [
  406. ["2009324", "%Y%W%w", Timestamp("2009-08-13")],
  407. ["2013020", "%Y%U%w", Timestamp("2013-01-13")],
  408. ],
  409. )
  410. def test_to_datetime_format_weeks(self, value, fmt, expected, cache):
  411. assert to_datetime(value, format=fmt, cache=cache) == expected
  412. @pytest.mark.parametrize(
  413. "fmt,dates,expected_dates",
  414. [
  415. [
  416. "%Y-%m-%d %H:%M:%S %Z",
  417. ["2010-01-01 12:00:00 UTC"] * 2,
  418. [Timestamp("2010-01-01 12:00:00", tz="UTC")] * 2,
  419. ],
  420. [
  421. "%Y-%m-%d %H:%M:%S%z",
  422. ["2010-01-01 12:00:00+0100"] * 2,
  423. [
  424. Timestamp(
  425. "2010-01-01 12:00:00", tzinfo=timezone(timedelta(minutes=60))
  426. )
  427. ]
  428. * 2,
  429. ],
  430. [
  431. "%Y-%m-%d %H:%M:%S %z",
  432. ["2010-01-01 12:00:00 +0100"] * 2,
  433. [
  434. Timestamp(
  435. "2010-01-01 12:00:00", tzinfo=timezone(timedelta(minutes=60))
  436. )
  437. ]
  438. * 2,
  439. ],
  440. [
  441. "%Y-%m-%d %H:%M:%S %z",
  442. ["2010-01-01 12:00:00 Z", "2010-01-01 12:00:00 Z"],
  443. [
  444. Timestamp(
  445. "2010-01-01 12:00:00", tzinfo=pytz.FixedOffset(0)
  446. ), # pytz coerces to UTC
  447. Timestamp("2010-01-01 12:00:00", tzinfo=pytz.FixedOffset(0)),
  448. ],
  449. ],
  450. ],
  451. )
  452. def test_to_datetime_parse_tzname_or_tzoffset(self, fmt, dates, expected_dates):
  453. # GH 13486
  454. result = to_datetime(dates, format=fmt)
  455. expected = Index(expected_dates)
  456. tm.assert_equal(result, expected)
  457. @pytest.mark.parametrize(
  458. "fmt,dates,expected_dates",
  459. [
  460. [
  461. "%Y-%m-%d %H:%M:%S %Z",
  462. [
  463. "2010-01-01 12:00:00 UTC",
  464. "2010-01-01 12:00:00 GMT",
  465. "2010-01-01 12:00:00 US/Pacific",
  466. ],
  467. [
  468. Timestamp("2010-01-01 12:00:00", tz="UTC"),
  469. Timestamp("2010-01-01 12:00:00", tz="GMT"),
  470. Timestamp("2010-01-01 12:00:00", tz="US/Pacific"),
  471. ],
  472. ],
  473. [
  474. "%Y-%m-%d %H:%M:%S %z",
  475. ["2010-01-01 12:00:00 +0100", "2010-01-01 12:00:00 -0100"],
  476. [
  477. Timestamp(
  478. "2010-01-01 12:00:00", tzinfo=timezone(timedelta(minutes=60))
  479. ),
  480. Timestamp(
  481. "2010-01-01 12:00:00", tzinfo=timezone(timedelta(minutes=-60))
  482. ),
  483. ],
  484. ],
  485. ],
  486. )
  487. def test_to_datetime_parse_tzname_or_tzoffset_utc_false_deprecated(
  488. self, fmt, dates, expected_dates
  489. ):
  490. # GH 13486, 50887
  491. msg = "parsing datetimes with mixed time zones will raise an error"
  492. with tm.assert_produces_warning(FutureWarning, match=msg):
  493. result = to_datetime(dates, format=fmt)
  494. expected = Index(expected_dates)
  495. tm.assert_equal(result, expected)
  496. def test_to_datetime_parse_tzname_or_tzoffset_different_tz_to_utc(self):
  497. # GH 32792
  498. dates = [
  499. "2010-01-01 12:00:00 +0100",
  500. "2010-01-01 12:00:00 -0100",
  501. "2010-01-01 12:00:00 +0300",
  502. "2010-01-01 12:00:00 +0400",
  503. ]
  504. expected_dates = [
  505. "2010-01-01 11:00:00+00:00",
  506. "2010-01-01 13:00:00+00:00",
  507. "2010-01-01 09:00:00+00:00",
  508. "2010-01-01 08:00:00+00:00",
  509. ]
  510. fmt = "%Y-%m-%d %H:%M:%S %z"
  511. result = to_datetime(dates, format=fmt, utc=True)
  512. expected = DatetimeIndex(expected_dates)
  513. tm.assert_index_equal(result, expected)
  514. @pytest.mark.parametrize(
  515. "offset", ["+0", "-1foo", "UTCbar", ":10", "+01:000:01", ""]
  516. )
  517. def test_to_datetime_parse_timezone_malformed(self, offset):
  518. fmt = "%Y-%m-%d %H:%M:%S %z"
  519. date = "2010-01-01 12:00:00 " + offset
  520. msg = "|".join(
  521. [
  522. r'^time data ".*" doesn\'t match format ".*", at position 0. '
  523. f"{PARSING_ERR_MSG}$",
  524. r'^unconverted data remains when parsing with format ".*": ".*", '
  525. f"at position 0. {PARSING_ERR_MSG}$",
  526. ]
  527. )
  528. with pytest.raises(ValueError, match=msg):
  529. to_datetime([date], format=fmt)
  530. def test_to_datetime_parse_timezone_keeps_name(self):
  531. # GH 21697
  532. fmt = "%Y-%m-%d %H:%M:%S %z"
  533. arg = Index(["2010-01-01 12:00:00 Z"], name="foo")
  534. result = to_datetime(arg, format=fmt)
  535. expected = DatetimeIndex(["2010-01-01 12:00:00"], tz="UTC", name="foo")
  536. tm.assert_index_equal(result, expected)
  537. class TestToDatetime:
  538. @pytest.mark.filterwarnings("ignore:Could not infer format")
  539. def test_to_datetime_overflow(self):
  540. # we should get an OutOfBoundsDatetime, NOT OverflowError
  541. # TODO: Timestamp raises ValueError("could not convert string to Timestamp")
  542. # can we make these more consistent?
  543. arg = "08335394550"
  544. msg = 'Parsing "08335394550" to datetime overflows, at position 0'
  545. with pytest.raises(OutOfBoundsDatetime, match=msg):
  546. to_datetime(arg)
  547. with pytest.raises(OutOfBoundsDatetime, match=msg):
  548. to_datetime([arg])
  549. res = to_datetime(arg, errors="coerce")
  550. assert res is NaT
  551. res = to_datetime([arg], errors="coerce")
  552. tm.assert_index_equal(res, Index([NaT]))
  553. res = to_datetime(arg, errors="ignore")
  554. assert isinstance(res, str) and res == arg
  555. res = to_datetime([arg], errors="ignore")
  556. tm.assert_index_equal(res, Index([arg], dtype=object))
  557. def test_to_datetime_mixed_datetime_and_string(self):
  558. # GH#47018 adapted old doctest with new behavior
  559. d1 = datetime(2020, 1, 1, 17, tzinfo=timezone(-timedelta(hours=1)))
  560. d2 = datetime(2020, 1, 1, 18, tzinfo=timezone(-timedelta(hours=1)))
  561. res = to_datetime(["2020-01-01 17:00 -0100", d2])
  562. expected = to_datetime([d1, d2]).tz_convert(timezone(timedelta(minutes=-60)))
  563. tm.assert_index_equal(res, expected)
  564. def test_to_datetime_mixed_string_and_numeric(self):
  565. # GH#55780 np.array(vals) would incorrectly cast the number to str
  566. vals = ["2016-01-01", 0]
  567. expected = DatetimeIndex([Timestamp(x) for x in vals])
  568. result = to_datetime(vals, format="mixed")
  569. result2 = to_datetime(vals[::-1], format="mixed")[::-1]
  570. result3 = DatetimeIndex(vals)
  571. result4 = DatetimeIndex(vals[::-1])[::-1]
  572. tm.assert_index_equal(result, expected)
  573. tm.assert_index_equal(result2, expected)
  574. tm.assert_index_equal(result3, expected)
  575. tm.assert_index_equal(result4, expected)
  576. @pytest.mark.parametrize(
  577. "format", ["%Y-%m-%d", "%Y-%d-%m"], ids=["ISO8601", "non-ISO8601"]
  578. )
  579. def test_to_datetime_mixed_date_and_string(self, format):
  580. # https://github.com/pandas-dev/pandas/issues/50108
  581. d1 = date(2020, 1, 2)
  582. res = to_datetime(["2020-01-01", d1], format=format)
  583. expected = DatetimeIndex(["2020-01-01", "2020-01-02"], dtype="M8[ns]")
  584. tm.assert_index_equal(res, expected)
  585. @pytest.mark.parametrize(
  586. "fmt",
  587. ["%Y-%d-%m %H:%M:%S%z", "%Y-%m-%d %H:%M:%S%z"],
  588. ids=["non-ISO8601 format", "ISO8601 format"],
  589. )
  590. @pytest.mark.parametrize(
  591. "utc, args, expected",
  592. [
  593. pytest.param(
  594. True,
  595. ["2000-01-01 01:00:00-08:00", "2000-01-01 02:00:00-08:00"],
  596. DatetimeIndex(
  597. ["2000-01-01 09:00:00+00:00", "2000-01-01 10:00:00+00:00"],
  598. dtype="datetime64[ns, UTC]",
  599. ),
  600. id="all tz-aware, with utc",
  601. ),
  602. pytest.param(
  603. False,
  604. ["2000-01-01 01:00:00+00:00", "2000-01-01 02:00:00+00:00"],
  605. DatetimeIndex(
  606. ["2000-01-01 01:00:00+00:00", "2000-01-01 02:00:00+00:00"],
  607. ),
  608. id="all tz-aware, without utc",
  609. ),
  610. pytest.param(
  611. True,
  612. ["2000-01-01 01:00:00-08:00", "2000-01-01 02:00:00+00:00"],
  613. DatetimeIndex(
  614. ["2000-01-01 09:00:00+00:00", "2000-01-01 02:00:00+00:00"],
  615. dtype="datetime64[ns, UTC]",
  616. ),
  617. id="all tz-aware, mixed offsets, with utc",
  618. ),
  619. pytest.param(
  620. True,
  621. ["2000-01-01 01:00:00", "2000-01-01 02:00:00+00:00"],
  622. DatetimeIndex(
  623. ["2000-01-01 01:00:00+00:00", "2000-01-01 02:00:00+00:00"],
  624. dtype="datetime64[ns, UTC]",
  625. ),
  626. id="tz-aware string, naive pydatetime, with utc",
  627. ),
  628. ],
  629. )
  630. @pytest.mark.parametrize(
  631. "constructor",
  632. [Timestamp, lambda x: Timestamp(x).to_pydatetime()],
  633. )
  634. def test_to_datetime_mixed_datetime_and_string_with_format(
  635. self, fmt, utc, args, expected, constructor
  636. ):
  637. # https://github.com/pandas-dev/pandas/issues/49298
  638. # https://github.com/pandas-dev/pandas/issues/50254
  639. # note: ISO8601 formats go down a fastpath, so we need to check both
  640. # a ISO8601 format and a non-ISO8601 one
  641. ts1 = constructor(args[0])
  642. ts2 = args[1]
  643. result = to_datetime([ts1, ts2], format=fmt, utc=utc)
  644. tm.assert_index_equal(result, expected)
  645. @pytest.mark.parametrize(
  646. "fmt",
  647. ["%Y-%d-%m %H:%M:%S%z", "%Y-%m-%d %H:%M:%S%z"],
  648. ids=["non-ISO8601 format", "ISO8601 format"],
  649. )
  650. @pytest.mark.parametrize(
  651. "constructor",
  652. [Timestamp, lambda x: Timestamp(x).to_pydatetime()],
  653. )
  654. def test_to_datetime_mixed_datetime_and_string_with_format_mixed_offsets_utc_false(
  655. self, fmt, constructor
  656. ):
  657. # https://github.com/pandas-dev/pandas/issues/49298
  658. # https://github.com/pandas-dev/pandas/issues/50254
  659. # note: ISO8601 formats go down a fastpath, so we need to check both
  660. # a ISO8601 format and a non-ISO8601 one
  661. args = ["2000-01-01 01:00:00", "2000-01-01 02:00:00+00:00"]
  662. ts1 = constructor(args[0])
  663. ts2 = args[1]
  664. msg = "parsing datetimes with mixed time zones will raise an error"
  665. expected = Index(
  666. [
  667. Timestamp("2000-01-01 01:00:00"),
  668. Timestamp("2000-01-01 02:00:00+0000", tz="UTC"),
  669. ],
  670. )
  671. with tm.assert_produces_warning(FutureWarning, match=msg):
  672. result = to_datetime([ts1, ts2], format=fmt, utc=False)
  673. tm.assert_index_equal(result, expected)
  674. @pytest.mark.parametrize(
  675. "fmt, expected",
  676. [
  677. pytest.param(
  678. "%Y-%m-%d %H:%M:%S%z",
  679. Index(
  680. [
  681. Timestamp("2000-01-01 09:00:00+0100", tz="UTC+01:00"),
  682. Timestamp("2000-01-02 02:00:00+0200", tz="UTC+02:00"),
  683. NaT,
  684. ]
  685. ),
  686. id="ISO8601, non-UTC",
  687. ),
  688. pytest.param(
  689. "%Y-%d-%m %H:%M:%S%z",
  690. Index(
  691. [
  692. Timestamp("2000-01-01 09:00:00+0100", tz="UTC+01:00"),
  693. Timestamp("2000-02-01 02:00:00+0200", tz="UTC+02:00"),
  694. NaT,
  695. ]
  696. ),
  697. id="non-ISO8601, non-UTC",
  698. ),
  699. ],
  700. )
  701. def test_to_datetime_mixed_offsets_with_none_tz(self, fmt, expected):
  702. # https://github.com/pandas-dev/pandas/issues/50071
  703. msg = "parsing datetimes with mixed time zones will raise an error"
  704. with tm.assert_produces_warning(FutureWarning, match=msg):
  705. result = to_datetime(
  706. ["2000-01-01 09:00:00+01:00", "2000-01-02 02:00:00+02:00", None],
  707. format=fmt,
  708. utc=False,
  709. )
  710. tm.assert_index_equal(result, expected)
  711. @pytest.mark.parametrize(
  712. "fmt, expected",
  713. [
  714. pytest.param(
  715. "%Y-%m-%d %H:%M:%S%z",
  716. DatetimeIndex(
  717. ["2000-01-01 08:00:00+00:00", "2000-01-02 00:00:00+00:00", "NaT"],
  718. dtype="datetime64[ns, UTC]",
  719. ),
  720. id="ISO8601, UTC",
  721. ),
  722. pytest.param(
  723. "%Y-%d-%m %H:%M:%S%z",
  724. DatetimeIndex(
  725. ["2000-01-01 08:00:00+00:00", "2000-02-01 00:00:00+00:00", "NaT"],
  726. dtype="datetime64[ns, UTC]",
  727. ),
  728. id="non-ISO8601, UTC",
  729. ),
  730. ],
  731. )
  732. def test_to_datetime_mixed_offsets_with_none(self, fmt, expected):
  733. # https://github.com/pandas-dev/pandas/issues/50071
  734. result = to_datetime(
  735. ["2000-01-01 09:00:00+01:00", "2000-01-02 02:00:00+02:00", None],
  736. format=fmt,
  737. utc=True,
  738. )
  739. tm.assert_index_equal(result, expected)
  740. @pytest.mark.parametrize(
  741. "fmt",
  742. ["%Y-%d-%m %H:%M:%S%z", "%Y-%m-%d %H:%M:%S%z"],
  743. ids=["non-ISO8601 format", "ISO8601 format"],
  744. )
  745. @pytest.mark.parametrize(
  746. "args",
  747. [
  748. pytest.param(
  749. ["2000-01-01 01:00:00-08:00", "2000-01-01 02:00:00-07:00"],
  750. id="all tz-aware, mixed timezones, without utc",
  751. ),
  752. ],
  753. )
  754. @pytest.mark.parametrize(
  755. "constructor",
  756. [Timestamp, lambda x: Timestamp(x).to_pydatetime()],
  757. )
  758. def test_to_datetime_mixed_datetime_and_string_with_format_raises(
  759. self, fmt, args, constructor
  760. ):
  761. # https://github.com/pandas-dev/pandas/issues/49298
  762. # note: ISO8601 formats go down a fastpath, so we need to check both
  763. # a ISO8601 format and a non-ISO8601 one
  764. ts1 = constructor(args[0])
  765. ts2 = constructor(args[1])
  766. with pytest.raises(
  767. ValueError, match="cannot be converted to datetime64 unless utc=True"
  768. ):
  769. to_datetime([ts1, ts2], format=fmt, utc=False)
  770. def test_to_datetime_np_str(self):
  771. # GH#32264
  772. # GH#48969
  773. value = np.str_("2019-02-04 10:18:46.297000+0000")
  774. ser = Series([value])
  775. exp = Timestamp("2019-02-04 10:18:46.297000", tz="UTC")
  776. assert to_datetime(value) == exp
  777. assert to_datetime(ser.iloc[0]) == exp
  778. res = to_datetime([value])
  779. expected = Index([exp])
  780. tm.assert_index_equal(res, expected)
  781. res = to_datetime(ser)
  782. expected = Series(expected)
  783. tm.assert_series_equal(res, expected)
  784. @pytest.mark.parametrize(
  785. "s, _format, dt",
  786. [
  787. ["2015-1-1", "%G-%V-%u", datetime(2014, 12, 29, 0, 0)],
  788. ["2015-1-4", "%G-%V-%u", datetime(2015, 1, 1, 0, 0)],
  789. ["2015-1-7", "%G-%V-%u", datetime(2015, 1, 4, 0, 0)],
  790. ],
  791. )
  792. def test_to_datetime_iso_week_year_format(self, s, _format, dt):
  793. # See GH#16607
  794. assert to_datetime(s, format=_format) == dt
  795. @pytest.mark.parametrize(
  796. "msg, s, _format",
  797. [
  798. [
  799. "ISO week directive '%V' is incompatible with the year directive "
  800. "'%Y'. Use the ISO year '%G' instead.",
  801. "1999 50",
  802. "%Y %V",
  803. ],
  804. [
  805. "ISO year directive '%G' must be used with the ISO week directive "
  806. "'%V' and a weekday directive '%A', '%a', '%w', or '%u'.",
  807. "1999 51",
  808. "%G %V",
  809. ],
  810. [
  811. "ISO year directive '%G' must be used with the ISO week directive "
  812. "'%V' and a weekday directive '%A', '%a', '%w', or '%u'.",
  813. "1999 Monday",
  814. "%G %A",
  815. ],
  816. [
  817. "ISO year directive '%G' must be used with the ISO week directive "
  818. "'%V' and a weekday directive '%A', '%a', '%w', or '%u'.",
  819. "1999 Mon",
  820. "%G %a",
  821. ],
  822. [
  823. "ISO year directive '%G' must be used with the ISO week directive "
  824. "'%V' and a weekday directive '%A', '%a', '%w', or '%u'.",
  825. "1999 6",
  826. "%G %w",
  827. ],
  828. [
  829. "ISO year directive '%G' must be used with the ISO week directive "
  830. "'%V' and a weekday directive '%A', '%a', '%w', or '%u'.",
  831. "1999 6",
  832. "%G %u",
  833. ],
  834. [
  835. "ISO year directive '%G' must be used with the ISO week directive "
  836. "'%V' and a weekday directive '%A', '%a', '%w', or '%u'.",
  837. "2051",
  838. "%G",
  839. ],
  840. [
  841. "Day of the year directive '%j' is not compatible with ISO year "
  842. "directive '%G'. Use '%Y' instead.",
  843. "1999 51 6 256",
  844. "%G %V %u %j",
  845. ],
  846. [
  847. "ISO week directive '%V' is incompatible with the year directive "
  848. "'%Y'. Use the ISO year '%G' instead.",
  849. "1999 51 Sunday",
  850. "%Y %V %A",
  851. ],
  852. [
  853. "ISO week directive '%V' is incompatible with the year directive "
  854. "'%Y'. Use the ISO year '%G' instead.",
  855. "1999 51 Sun",
  856. "%Y %V %a",
  857. ],
  858. [
  859. "ISO week directive '%V' is incompatible with the year directive "
  860. "'%Y'. Use the ISO year '%G' instead.",
  861. "1999 51 1",
  862. "%Y %V %w",
  863. ],
  864. [
  865. "ISO week directive '%V' is incompatible with the year directive "
  866. "'%Y'. Use the ISO year '%G' instead.",
  867. "1999 51 1",
  868. "%Y %V %u",
  869. ],
  870. [
  871. "ISO week directive '%V' must be used with the ISO year directive "
  872. "'%G' and a weekday directive '%A', '%a', '%w', or '%u'.",
  873. "20",
  874. "%V",
  875. ],
  876. [
  877. "ISO week directive '%V' must be used with the ISO year directive "
  878. "'%G' and a weekday directive '%A', '%a', '%w', or '%u'.",
  879. "1999 51 Sunday",
  880. "%V %A",
  881. ],
  882. [
  883. "ISO week directive '%V' must be used with the ISO year directive "
  884. "'%G' and a weekday directive '%A', '%a', '%w', or '%u'.",
  885. "1999 51 Sun",
  886. "%V %a",
  887. ],
  888. [
  889. "ISO week directive '%V' must be used with the ISO year directive "
  890. "'%G' and a weekday directive '%A', '%a', '%w', or '%u'.",
  891. "1999 51 1",
  892. "%V %w",
  893. ],
  894. [
  895. "ISO week directive '%V' must be used with the ISO year directive "
  896. "'%G' and a weekday directive '%A', '%a', '%w', or '%u'.",
  897. "1999 51 1",
  898. "%V %u",
  899. ],
  900. [
  901. "Day of the year directive '%j' is not compatible with ISO year "
  902. "directive '%G'. Use '%Y' instead.",
  903. "1999 50",
  904. "%G %j",
  905. ],
  906. [
  907. "ISO week directive '%V' must be used with the ISO year directive "
  908. "'%G' and a weekday directive '%A', '%a', '%w', or '%u'.",
  909. "20 Monday",
  910. "%V %A",
  911. ],
  912. ],
  913. )
  914. @pytest.mark.parametrize("errors", ["raise", "coerce", "ignore"])
  915. def test_error_iso_week_year(self, msg, s, _format, errors):
  916. # See GH#16607, GH#50308
  917. # This test checks for errors thrown when giving the wrong format
  918. # However, as discussed on PR#25541, overriding the locale
  919. # causes a different error to be thrown due to the format being
  920. # locale specific, but the test data is in english.
  921. # Therefore, the tests only run when locale is not overwritten,
  922. # as a sort of solution to this problem.
  923. if locale.getlocale() != ("zh_CN", "UTF-8") and locale.getlocale() != (
  924. "it_IT",
  925. "UTF-8",
  926. ):
  927. with pytest.raises(ValueError, match=msg):
  928. to_datetime(s, format=_format, errors=errors)
  929. @pytest.mark.parametrize("tz", [None, "US/Central"])
  930. def test_to_datetime_dtarr(self, tz):
  931. # DatetimeArray
  932. dti = date_range("1965-04-03", periods=19, freq="2W", tz=tz)
  933. arr = dti._data
  934. result = to_datetime(arr)
  935. assert result is arr
  936. # Doesn't work on Windows since tzpath not set correctly
  937. @td.skip_if_windows
  938. @pytest.mark.parametrize("arg_class", [Series, Index])
  939. @pytest.mark.parametrize("utc", [True, False])
  940. @pytest.mark.parametrize("tz", [None, "US/Central"])
  941. def test_to_datetime_arrow(self, tz, utc, arg_class):
  942. pa = pytest.importorskip("pyarrow")
  943. dti = date_range("1965-04-03", periods=19, freq="2W", tz=tz)
  944. dti = arg_class(dti)
  945. dti_arrow = dti.astype(pd.ArrowDtype(pa.timestamp(unit="ns", tz=tz)))
  946. result = to_datetime(dti_arrow, utc=utc)
  947. expected = to_datetime(dti, utc=utc).astype(
  948. pd.ArrowDtype(pa.timestamp(unit="ns", tz=tz if not utc else "UTC"))
  949. )
  950. if not utc and arg_class is not Series:
  951. # Doesn't hold for utc=True, since that will astype
  952. # to_datetime also returns a new object for series
  953. assert result is dti_arrow
  954. if arg_class is Series:
  955. tm.assert_series_equal(result, expected)
  956. else:
  957. tm.assert_index_equal(result, expected)
  958. def test_to_datetime_pydatetime(self):
  959. actual = to_datetime(datetime(2008, 1, 15))
  960. assert actual == datetime(2008, 1, 15)
  961. def test_to_datetime_YYYYMMDD(self):
  962. actual = to_datetime("20080115")
  963. assert actual == datetime(2008, 1, 15)
  964. def test_to_datetime_unparsable_ignore(self):
  965. # unparsable
  966. ser = "Month 1, 1999"
  967. assert to_datetime(ser, errors="ignore") == ser
  968. @td.skip_if_windows # `tm.set_timezone` does not work in windows
  969. def test_to_datetime_now(self):
  970. # See GH#18666
  971. with tm.set_timezone("US/Eastern"):
  972. # GH#18705
  973. now = Timestamp("now").as_unit("ns")
  974. pdnow = to_datetime("now")
  975. pdnow2 = to_datetime(["now"])[0]
  976. # These should all be equal with infinite perf; this gives
  977. # a generous margin of 10 seconds
  978. assert abs(pdnow._value - now._value) < 1e10
  979. assert abs(pdnow2._value - now._value) < 1e10
  980. assert pdnow.tzinfo is None
  981. assert pdnow2.tzinfo is None
  982. @td.skip_if_windows # `tm.set_timezone` does not work in windows
  983. @pytest.mark.parametrize("tz", ["Pacific/Auckland", "US/Samoa"])
  984. def test_to_datetime_today(self, tz):
  985. # See GH#18666
  986. # Test with one timezone far ahead of UTC and another far behind, so
  987. # one of these will _almost_ always be in a different day from UTC.
  988. # Unfortunately this test between 12 and 1 AM Samoa time
  989. # this both of these timezones _and_ UTC will all be in the same day,
  990. # so this test will not detect the regression introduced in #18666.
  991. with tm.set_timezone(tz):
  992. nptoday = np.datetime64("today").astype("datetime64[ns]").astype(np.int64)
  993. pdtoday = to_datetime("today")
  994. pdtoday2 = to_datetime(["today"])[0]
  995. tstoday = Timestamp("today").as_unit("ns")
  996. tstoday2 = Timestamp.today().as_unit("ns")
  997. # These should all be equal with infinite perf; this gives
  998. # a generous margin of 10 seconds
  999. assert abs(pdtoday.normalize()._value - nptoday) < 1e10
  1000. assert abs(pdtoday2.normalize()._value - nptoday) < 1e10
  1001. assert abs(pdtoday._value - tstoday._value) < 1e10
  1002. assert abs(pdtoday._value - tstoday2._value) < 1e10
  1003. assert pdtoday.tzinfo is None
  1004. assert pdtoday2.tzinfo is None
  1005. @pytest.mark.parametrize("arg", ["now", "today"])
  1006. def test_to_datetime_today_now_unicode_bytes(self, arg):
  1007. to_datetime([arg])
  1008. @pytest.mark.parametrize(
  1009. "format, expected_ds",
  1010. [
  1011. ("%Y-%m-%d %H:%M:%S%z", "2020-01-03"),
  1012. ("%Y-%d-%m %H:%M:%S%z", "2020-03-01"),
  1013. (None, "2020-01-03"),
  1014. ],
  1015. )
  1016. @pytest.mark.parametrize(
  1017. "string, attribute",
  1018. [
  1019. ("now", "utcnow"),
  1020. ("today", "today"),
  1021. ],
  1022. )
  1023. def test_to_datetime_now_with_format(self, format, expected_ds, string, attribute):
  1024. # https://github.com/pandas-dev/pandas/issues/50359
  1025. result = to_datetime(["2020-01-03 00:00:00Z", string], format=format, utc=True)
  1026. expected = DatetimeIndex(
  1027. [expected_ds, getattr(Timestamp, attribute)()], dtype="datetime64[ns, UTC]"
  1028. )
  1029. assert (expected - result).max().total_seconds() < 1
  1030. @pytest.mark.parametrize(
  1031. "dt", [np.datetime64("2000-01-01"), np.datetime64("2000-01-02")]
  1032. )
  1033. def test_to_datetime_dt64s(self, cache, dt):
  1034. assert to_datetime(dt, cache=cache) == Timestamp(dt)
  1035. @pytest.mark.parametrize(
  1036. "arg, format",
  1037. [
  1038. ("2001-01-01", "%Y-%m-%d"),
  1039. ("01-01-2001", "%d-%m-%Y"),
  1040. ],
  1041. )
  1042. def test_to_datetime_dt64s_and_str(self, arg, format):
  1043. # https://github.com/pandas-dev/pandas/issues/50036
  1044. result = to_datetime([arg, np.datetime64("2020-01-01")], format=format)
  1045. expected = DatetimeIndex(["2001-01-01", "2020-01-01"])
  1046. tm.assert_index_equal(result, expected)
  1047. @pytest.mark.parametrize(
  1048. "dt", [np.datetime64("1000-01-01"), np.datetime64("5000-01-02")]
  1049. )
  1050. @pytest.mark.parametrize("errors", ["raise", "ignore", "coerce"])
  1051. def test_to_datetime_dt64s_out_of_ns_bounds(self, cache, dt, errors):
  1052. # GH#50369 We cast to the nearest supported reso, i.e. "s"
  1053. ts = to_datetime(dt, errors=errors, cache=cache)
  1054. assert isinstance(ts, Timestamp)
  1055. assert ts.unit == "s"
  1056. assert ts.asm8 == dt
  1057. ts = Timestamp(dt)
  1058. assert ts.unit == "s"
  1059. assert ts.asm8 == dt
  1060. @pytest.mark.skip_ubsan
  1061. def test_to_datetime_dt64d_out_of_bounds(self, cache):
  1062. dt64 = np.datetime64(np.iinfo(np.int64).max, "D")
  1063. msg = "Out of bounds second timestamp: 25252734927768524-07-27"
  1064. with pytest.raises(OutOfBoundsDatetime, match=msg):
  1065. Timestamp(dt64)
  1066. with pytest.raises(OutOfBoundsDatetime, match=msg):
  1067. to_datetime(dt64, errors="raise", cache=cache)
  1068. assert to_datetime(dt64, errors="coerce", cache=cache) is NaT
  1069. @pytest.mark.parametrize("unit", ["s", "D"])
  1070. def test_to_datetime_array_of_dt64s(self, cache, unit):
  1071. # https://github.com/pandas-dev/pandas/issues/31491
  1072. # Need at least 50 to ensure cache is used.
  1073. dts = [
  1074. np.datetime64("2000-01-01", unit),
  1075. np.datetime64("2000-01-02", unit),
  1076. ] * 30
  1077. # Assuming all datetimes are in bounds, to_datetime() returns
  1078. # an array that is equal to Timestamp() parsing
  1079. result = to_datetime(dts, cache=cache)
  1080. if cache:
  1081. # FIXME: behavior should not depend on cache
  1082. expected = DatetimeIndex([Timestamp(x).asm8 for x in dts], dtype="M8[s]")
  1083. else:
  1084. expected = DatetimeIndex([Timestamp(x).asm8 for x in dts], dtype="M8[ns]")
  1085. tm.assert_index_equal(result, expected)
  1086. # A list of datetimes where the last one is out of bounds
  1087. dts_with_oob = dts + [np.datetime64("9999-01-01")]
  1088. # As of GH#51978 we do not raise in this case
  1089. to_datetime(dts_with_oob, errors="raise")
  1090. result = to_datetime(dts_with_oob, errors="coerce", cache=cache)
  1091. if not cache:
  1092. # FIXME: shouldn't depend on cache!
  1093. expected = DatetimeIndex(
  1094. [Timestamp(dts_with_oob[0]).asm8, Timestamp(dts_with_oob[1]).asm8] * 30
  1095. + [NaT],
  1096. )
  1097. else:
  1098. expected = DatetimeIndex(np.array(dts_with_oob, dtype="M8[s]"))
  1099. tm.assert_index_equal(result, expected)
  1100. # With errors='ignore', out of bounds datetime64s
  1101. # are converted to their .item(), which depending on the version of
  1102. # numpy is either a python datetime.datetime or datetime.date
  1103. result = to_datetime(dts_with_oob, errors="ignore", cache=cache)
  1104. if not cache:
  1105. # FIXME: shouldn't depend on cache!
  1106. expected = Index(dts_with_oob)
  1107. tm.assert_index_equal(result, expected)
  1108. def test_out_of_bounds_errors_ignore(self):
  1109. # https://github.com/pandas-dev/pandas/issues/50587
  1110. result = to_datetime(np.datetime64("9999-01-01"), errors="ignore")
  1111. expected = np.datetime64("9999-01-01")
  1112. assert result == expected
  1113. def test_out_of_bounds_errors_ignore2(self):
  1114. # GH#12424
  1115. msg = "errors='ignore' is deprecated"
  1116. with tm.assert_produces_warning(FutureWarning, match=msg):
  1117. res = to_datetime(Series(["2362-01-01", np.nan]), errors="ignore")
  1118. exp = Series(["2362-01-01", np.nan])
  1119. tm.assert_series_equal(res, exp)
  1120. def test_to_datetime_tz(self, cache):
  1121. # xref 8260
  1122. # uniform returns a DatetimeIndex
  1123. arr = [
  1124. Timestamp("2013-01-01 13:00:00-0800", tz="US/Pacific"),
  1125. Timestamp("2013-01-02 14:00:00-0800", tz="US/Pacific"),
  1126. ]
  1127. result = to_datetime(arr, cache=cache)
  1128. expected = DatetimeIndex(
  1129. ["2013-01-01 13:00:00", "2013-01-02 14:00:00"], tz="US/Pacific"
  1130. )
  1131. tm.assert_index_equal(result, expected)
  1132. def test_to_datetime_tz_mixed(self, cache):
  1133. # mixed tzs will raise if errors='raise'
  1134. # https://github.com/pandas-dev/pandas/issues/50585
  1135. arr = [
  1136. Timestamp("2013-01-01 13:00:00", tz="US/Pacific"),
  1137. Timestamp("2013-01-02 14:00:00", tz="US/Eastern"),
  1138. ]
  1139. msg = (
  1140. "Tz-aware datetime.datetime cannot be "
  1141. "converted to datetime64 unless utc=True"
  1142. )
  1143. with pytest.raises(ValueError, match=msg):
  1144. to_datetime(arr, cache=cache)
  1145. depr_msg = "errors='ignore' is deprecated"
  1146. with tm.assert_produces_warning(FutureWarning, match=depr_msg):
  1147. result = to_datetime(arr, cache=cache, errors="ignore")
  1148. expected = Index(
  1149. [
  1150. Timestamp("2013-01-01 13:00:00-08:00"),
  1151. Timestamp("2013-01-02 14:00:00-05:00"),
  1152. ],
  1153. dtype="object",
  1154. )
  1155. tm.assert_index_equal(result, expected)
  1156. result = to_datetime(arr, cache=cache, errors="coerce")
  1157. expected = DatetimeIndex(
  1158. ["2013-01-01 13:00:00-08:00", "NaT"], dtype="datetime64[ns, US/Pacific]"
  1159. )
  1160. tm.assert_index_equal(result, expected)
  1161. def test_to_datetime_different_offsets(self, cache):
  1162. # inspired by asv timeseries.ToDatetimeNONISO8601 benchmark
  1163. # see GH-26097 for more
  1164. ts_string_1 = "March 1, 2018 12:00:00+0400"
  1165. ts_string_2 = "March 1, 2018 12:00:00+0500"
  1166. arr = [ts_string_1] * 5 + [ts_string_2] * 5
  1167. expected = Index([parse(x) for x in arr])
  1168. msg = "parsing datetimes with mixed time zones will raise an error"
  1169. with tm.assert_produces_warning(FutureWarning, match=msg):
  1170. result = to_datetime(arr, cache=cache)
  1171. tm.assert_index_equal(result, expected)
  1172. def test_to_datetime_tz_pytz(self, cache):
  1173. # see gh-8260
  1174. us_eastern = pytz.timezone("US/Eastern")
  1175. arr = np.array(
  1176. [
  1177. us_eastern.localize(
  1178. datetime(year=2000, month=1, day=1, hour=3, minute=0)
  1179. ),
  1180. us_eastern.localize(
  1181. datetime(year=2000, month=6, day=1, hour=3, minute=0)
  1182. ),
  1183. ],
  1184. dtype=object,
  1185. )
  1186. result = to_datetime(arr, utc=True, cache=cache)
  1187. expected = DatetimeIndex(
  1188. ["2000-01-01 08:00:00+00:00", "2000-06-01 07:00:00+00:00"],
  1189. dtype="datetime64[ns, UTC]",
  1190. freq=None,
  1191. )
  1192. tm.assert_index_equal(result, expected)
  1193. @pytest.mark.parametrize(
  1194. "init_constructor, end_constructor",
  1195. [
  1196. (Index, DatetimeIndex),
  1197. (list, DatetimeIndex),
  1198. (np.array, DatetimeIndex),
  1199. (Series, Series),
  1200. ],
  1201. )
  1202. def test_to_datetime_utc_true(self, cache, init_constructor, end_constructor):
  1203. # See gh-11934 & gh-6415
  1204. data = ["20100102 121314", "20100102 121315"]
  1205. expected_data = [
  1206. Timestamp("2010-01-02 12:13:14", tz="utc"),
  1207. Timestamp("2010-01-02 12:13:15", tz="utc"),
  1208. ]
  1209. result = to_datetime(
  1210. init_constructor(data), format="%Y%m%d %H%M%S", utc=True, cache=cache
  1211. )
  1212. expected = end_constructor(expected_data)
  1213. tm.assert_equal(result, expected)
  1214. @pytest.mark.parametrize(
  1215. "scalar, expected",
  1216. [
  1217. ["20100102 121314", Timestamp("2010-01-02 12:13:14", tz="utc")],
  1218. ["20100102 121315", Timestamp("2010-01-02 12:13:15", tz="utc")],
  1219. ],
  1220. )
  1221. def test_to_datetime_utc_true_scalar(self, cache, scalar, expected):
  1222. # Test scalar case as well
  1223. result = to_datetime(scalar, format="%Y%m%d %H%M%S", utc=True, cache=cache)
  1224. assert result == expected
  1225. def test_to_datetime_utc_true_with_series_single_value(self, cache):
  1226. # GH 15760 UTC=True with Series
  1227. ts = 1.5e18
  1228. result = to_datetime(Series([ts]), utc=True, cache=cache)
  1229. expected = Series([Timestamp(ts, tz="utc")])
  1230. tm.assert_series_equal(result, expected)
  1231. def test_to_datetime_utc_true_with_series_tzaware_string(self, cache):
  1232. ts = "2013-01-01 00:00:00-01:00"
  1233. expected_ts = "2013-01-01 01:00:00"
  1234. data = Series([ts] * 3)
  1235. result = to_datetime(data, utc=True, cache=cache)
  1236. expected = Series([Timestamp(expected_ts, tz="utc")] * 3)
  1237. tm.assert_series_equal(result, expected)
  1238. @pytest.mark.parametrize(
  1239. "date, dtype",
  1240. [
  1241. ("2013-01-01 01:00:00", "datetime64[ns]"),
  1242. ("2013-01-01 01:00:00", "datetime64[ns, UTC]"),
  1243. ],
  1244. )
  1245. def test_to_datetime_utc_true_with_series_datetime_ns(self, cache, date, dtype):
  1246. expected = Series(
  1247. [Timestamp("2013-01-01 01:00:00", tz="UTC")], dtype="M8[ns, UTC]"
  1248. )
  1249. result = to_datetime(Series([date], dtype=dtype), utc=True, cache=cache)
  1250. tm.assert_series_equal(result, expected)
  1251. def test_to_datetime_tz_psycopg2(self, request, cache):
  1252. # xref 8260
  1253. psycopg2_tz = pytest.importorskip("psycopg2.tz")
  1254. # misc cases
  1255. tz1 = psycopg2_tz.FixedOffsetTimezone(offset=-300, name=None)
  1256. tz2 = psycopg2_tz.FixedOffsetTimezone(offset=-240, name=None)
  1257. arr = np.array(
  1258. [
  1259. datetime(2000, 1, 1, 3, 0, tzinfo=tz1),
  1260. datetime(2000, 6, 1, 3, 0, tzinfo=tz2),
  1261. ],
  1262. dtype=object,
  1263. )
  1264. result = to_datetime(arr, errors="coerce", utc=True, cache=cache)
  1265. expected = DatetimeIndex(
  1266. ["2000-01-01 08:00:00+00:00", "2000-06-01 07:00:00+00:00"],
  1267. dtype="datetime64[ns, UTC]",
  1268. freq=None,
  1269. )
  1270. tm.assert_index_equal(result, expected)
  1271. # dtype coercion
  1272. i = DatetimeIndex(
  1273. ["2000-01-01 08:00:00"],
  1274. tz=psycopg2_tz.FixedOffsetTimezone(offset=-300, name=None),
  1275. )
  1276. assert is_datetime64_ns_dtype(i)
  1277. # tz coercion
  1278. result = to_datetime(i, errors="coerce", cache=cache)
  1279. tm.assert_index_equal(result, i)
  1280. result = to_datetime(i, errors="coerce", utc=True, cache=cache)
  1281. expected = DatetimeIndex(["2000-01-01 13:00:00"], dtype="datetime64[ns, UTC]")
  1282. tm.assert_index_equal(result, expected)
  1283. @pytest.mark.parametrize("arg", [True, False])
  1284. def test_datetime_bool(self, cache, arg):
  1285. # GH13176
  1286. msg = r"dtype bool cannot be converted to datetime64\[ns\]"
  1287. with pytest.raises(TypeError, match=msg):
  1288. to_datetime(arg)
  1289. assert to_datetime(arg, errors="coerce", cache=cache) is NaT
  1290. assert to_datetime(arg, errors="ignore", cache=cache) is arg
  1291. def test_datetime_bool_arrays_mixed(self, cache):
  1292. msg = f"{type(cache)} is not convertible to datetime"
  1293. with pytest.raises(TypeError, match=msg):
  1294. to_datetime([False, datetime.today()], cache=cache)
  1295. with pytest.raises(
  1296. ValueError,
  1297. match=(
  1298. r'^time data "True" doesn\'t match format "%Y%m%d", '
  1299. f"at position 1. {PARSING_ERR_MSG}$"
  1300. ),
  1301. ):
  1302. to_datetime(["20130101", True], cache=cache)
  1303. tm.assert_index_equal(
  1304. to_datetime([0, False, NaT, 0.0], errors="coerce", cache=cache),
  1305. DatetimeIndex(
  1306. [to_datetime(0, cache=cache), NaT, NaT, to_datetime(0, cache=cache)]
  1307. ),
  1308. )
  1309. @pytest.mark.parametrize("arg", [bool, to_datetime])
  1310. def test_datetime_invalid_datatype(self, arg):
  1311. # GH13176
  1312. msg = "is not convertible to datetime"
  1313. with pytest.raises(TypeError, match=msg):
  1314. to_datetime(arg)
  1315. @pytest.mark.parametrize("errors", ["coerce", "raise", "ignore"])
  1316. def test_invalid_format_raises(self, errors):
  1317. # https://github.com/pandas-dev/pandas/issues/50255
  1318. with pytest.raises(
  1319. ValueError, match="':' is a bad directive in format 'H%:M%:S%"
  1320. ):
  1321. to_datetime(["00:00:00"], format="H%:M%:S%", errors=errors)
  1322. @pytest.mark.parametrize("value", ["a", "00:01:99"])
  1323. @pytest.mark.parametrize("format", [None, "%H:%M:%S"])
  1324. def test_datetime_invalid_scalar(self, value, format):
  1325. # GH24763
  1326. res = to_datetime(value, errors="ignore", format=format)
  1327. assert res == value
  1328. res = to_datetime(value, errors="coerce", format=format)
  1329. assert res is NaT
  1330. msg = "|".join(
  1331. [
  1332. r'^time data "a" doesn\'t match format "%H:%M:%S", at position 0. '
  1333. f"{PARSING_ERR_MSG}$",
  1334. r'^Given date string "a" not likely a datetime, at position 0$',
  1335. r'^unconverted data remains when parsing with format "%H:%M:%S": "9", '
  1336. f"at position 0. {PARSING_ERR_MSG}$",
  1337. rf"^second must be in 0..59{NOT_99}: 00:01:99, at position 0$",
  1338. ]
  1339. )
  1340. with pytest.raises(ValueError, match=msg):
  1341. to_datetime(value, errors="raise", format=format)
  1342. @pytest.mark.parametrize("value", ["3000/12/11 00:00:00"])
  1343. @pytest.mark.parametrize("format", [None, "%H:%M:%S"])
  1344. def test_datetime_outofbounds_scalar(self, value, format):
  1345. # GH24763
  1346. res = to_datetime(value, errors="ignore", format=format)
  1347. assert res == value
  1348. res = to_datetime(value, errors="coerce", format=format)
  1349. assert res is NaT
  1350. if format is not None:
  1351. msg = r'^time data ".*" doesn\'t match format ".*", at position 0.'
  1352. with pytest.raises(ValueError, match=msg):
  1353. to_datetime(value, errors="raise", format=format)
  1354. else:
  1355. msg = "^Out of bounds .*, at position 0$"
  1356. with pytest.raises(OutOfBoundsDatetime, match=msg):
  1357. to_datetime(value, errors="raise", format=format)
  1358. @pytest.mark.parametrize(
  1359. ("values"), [(["a"]), (["00:01:99"]), (["a", "b", "99:00:00"])]
  1360. )
  1361. @pytest.mark.parametrize("format", [(None), ("%H:%M:%S")])
  1362. def test_datetime_invalid_index(self, values, format):
  1363. # GH24763
  1364. # Not great to have logic in tests, but this one's hard to
  1365. # parametrise over
  1366. if format is None and len(values) > 1:
  1367. warn = UserWarning
  1368. else:
  1369. warn = None
  1370. with tm.assert_produces_warning(
  1371. warn, match="Could not infer format", raise_on_extra_warnings=False
  1372. ):
  1373. res = to_datetime(values, errors="ignore", format=format)
  1374. tm.assert_index_equal(
  1375. res, Index(values, dtype="object" if format is None else "str")
  1376. )
  1377. with tm.assert_produces_warning(
  1378. warn, match="Could not infer format", raise_on_extra_warnings=False
  1379. ):
  1380. res = to_datetime(values, errors="coerce", format=format)
  1381. tm.assert_index_equal(res, DatetimeIndex([NaT] * len(values)))
  1382. msg = "|".join(
  1383. [
  1384. r'^Given date string "a" not likely a datetime, at position 0$',
  1385. r'^time data "a" doesn\'t match format "%H:%M:%S", at position 0. '
  1386. f"{PARSING_ERR_MSG}$",
  1387. r'^unconverted data remains when parsing with format "%H:%M:%S": "9", '
  1388. f"at position 0. {PARSING_ERR_MSG}$",
  1389. rf"^second must be in 0..59{NOT_99}: 00:01:99, at position 0$",
  1390. ]
  1391. )
  1392. with pytest.raises(ValueError, match=msg):
  1393. with tm.assert_produces_warning(
  1394. warn, match="Could not infer format", raise_on_extra_warnings=False
  1395. ):
  1396. to_datetime(values, errors="raise", format=format)
  1397. @pytest.mark.parametrize("utc", [True, None])
  1398. @pytest.mark.parametrize("format", ["%Y%m%d %H:%M:%S", None])
  1399. @pytest.mark.parametrize("constructor", [list, tuple, np.array, Index, deque])
  1400. def test_to_datetime_cache(self, utc, format, constructor):
  1401. date = "20130101 00:00:00"
  1402. test_dates = [date] * 10**5
  1403. data = constructor(test_dates)
  1404. result = to_datetime(data, utc=utc, format=format, cache=True)
  1405. expected = to_datetime(data, utc=utc, format=format, cache=False)
  1406. tm.assert_index_equal(result, expected)
  1407. def test_to_datetime_from_deque(self):
  1408. # GH 29403
  1409. result = to_datetime(deque([Timestamp("2010-06-02 09:30:00")] * 51))
  1410. expected = to_datetime([Timestamp("2010-06-02 09:30:00")] * 51)
  1411. tm.assert_index_equal(result, expected)
  1412. @pytest.mark.parametrize("utc", [True, None])
  1413. @pytest.mark.parametrize("format", ["%Y%m%d %H:%M:%S", None])
  1414. def test_to_datetime_cache_series(self, utc, format):
  1415. date = "20130101 00:00:00"
  1416. test_dates = [date] * 10**5
  1417. data = Series(test_dates)
  1418. result = to_datetime(data, utc=utc, format=format, cache=True)
  1419. expected = to_datetime(data, utc=utc, format=format, cache=False)
  1420. tm.assert_series_equal(result, expected)
  1421. def test_to_datetime_cache_scalar(self):
  1422. date = "20130101 00:00:00"
  1423. result = to_datetime(date, cache=True)
  1424. expected = Timestamp("20130101 00:00:00")
  1425. assert result == expected
  1426. @pytest.mark.parametrize(
  1427. "datetimelikes,expected_values",
  1428. (
  1429. (
  1430. (None, np.nan) + (NaT,) * start_caching_at,
  1431. (NaT,) * (start_caching_at + 2),
  1432. ),
  1433. (
  1434. (None, Timestamp("2012-07-26")) + (NaT,) * start_caching_at,
  1435. (NaT, Timestamp("2012-07-26")) + (NaT,) * start_caching_at,
  1436. ),
  1437. (
  1438. (None,)
  1439. + (NaT,) * start_caching_at
  1440. + ("2012 July 26", Timestamp("2012-07-26")),
  1441. (NaT,) * (start_caching_at + 1)
  1442. + (Timestamp("2012-07-26"), Timestamp("2012-07-26")),
  1443. ),
  1444. ),
  1445. )
  1446. def test_convert_object_to_datetime_with_cache(
  1447. self, datetimelikes, expected_values
  1448. ):
  1449. # GH#39882
  1450. ser = Series(
  1451. datetimelikes,
  1452. dtype="object",
  1453. )
  1454. result_series = to_datetime(ser, errors="coerce")
  1455. expected_series = Series(
  1456. expected_values,
  1457. dtype="datetime64[ns]",
  1458. )
  1459. tm.assert_series_equal(result_series, expected_series)
  1460. @pytest.mark.parametrize("cache", [True, False])
  1461. @pytest.mark.parametrize(
  1462. "input",
  1463. [
  1464. Series([NaT] * 20 + [None] * 20, dtype="object"),
  1465. Series([NaT] * 60 + [None] * 60, dtype="object"),
  1466. Series([None] * 20),
  1467. Series([None] * 60),
  1468. Series([""] * 20),
  1469. Series([""] * 60),
  1470. Series([pd.NA] * 20),
  1471. Series([pd.NA] * 60),
  1472. Series([np.nan] * 20),
  1473. Series([np.nan] * 60),
  1474. ],
  1475. )
  1476. def test_to_datetime_converts_null_like_to_nat(self, cache, input):
  1477. # GH35888
  1478. expected = Series([NaT] * len(input), dtype="M8[ns]")
  1479. result = to_datetime(input, cache=cache)
  1480. tm.assert_series_equal(result, expected)
  1481. @pytest.mark.parametrize(
  1482. "date, format",
  1483. [
  1484. ("2017-20", "%Y-%W"),
  1485. ("20 Sunday", "%W %A"),
  1486. ("20 Sun", "%W %a"),
  1487. ("2017-21", "%Y-%U"),
  1488. ("20 Sunday", "%U %A"),
  1489. ("20 Sun", "%U %a"),
  1490. ],
  1491. )
  1492. def test_week_without_day_and_calendar_year(self, date, format):
  1493. # GH16774
  1494. msg = "Cannot use '%W' or '%U' without day and year"
  1495. with pytest.raises(ValueError, match=msg):
  1496. to_datetime(date, format=format)
  1497. def test_to_datetime_coerce(self):
  1498. # GH 26122
  1499. ts_strings = [
  1500. "March 1, 2018 12:00:00+0400",
  1501. "March 1, 2018 12:00:00+0500",
  1502. "20100240",
  1503. ]
  1504. msg = "parsing datetimes with mixed time zones will raise an error"
  1505. with tm.assert_produces_warning(FutureWarning, match=msg):
  1506. result = to_datetime(ts_strings, errors="coerce")
  1507. expected = Index(
  1508. [
  1509. datetime(2018, 3, 1, 12, 0, tzinfo=tzoffset(None, 14400)),
  1510. datetime(2018, 3, 1, 12, 0, tzinfo=tzoffset(None, 18000)),
  1511. NaT,
  1512. ]
  1513. )
  1514. tm.assert_index_equal(result, expected)
  1515. @pytest.mark.parametrize(
  1516. "string_arg, format",
  1517. [("March 1, 2018", "%B %d, %Y"), ("2018-03-01", "%Y-%m-%d")],
  1518. )
  1519. @pytest.mark.parametrize(
  1520. "outofbounds",
  1521. [
  1522. datetime(9999, 1, 1),
  1523. date(9999, 1, 1),
  1524. np.datetime64("9999-01-01"),
  1525. "January 1, 9999",
  1526. "9999-01-01",
  1527. ],
  1528. )
  1529. def test_to_datetime_coerce_oob(self, string_arg, format, outofbounds):
  1530. # https://github.com/pandas-dev/pandas/issues/50255
  1531. ts_strings = [string_arg, outofbounds]
  1532. result = to_datetime(ts_strings, errors="coerce", format=format)
  1533. expected = DatetimeIndex([datetime(2018, 3, 1), NaT])
  1534. tm.assert_index_equal(result, expected)
  1535. @pytest.mark.parametrize(
  1536. "errors, expected",
  1537. [
  1538. ("coerce", Index([NaT, NaT])),
  1539. ("ignore", Index(["200622-12-31", "111111-24-11"], dtype=object)),
  1540. ],
  1541. )
  1542. def test_to_datetime_malformed_no_raise(self, errors, expected):
  1543. # GH 28299
  1544. # GH 48633
  1545. ts_strings = ["200622-12-31", "111111-24-11"]
  1546. with tm.assert_produces_warning(
  1547. UserWarning, match="Could not infer format", raise_on_extra_warnings=False
  1548. ):
  1549. result = to_datetime(ts_strings, errors=errors)
  1550. tm.assert_index_equal(result, expected)
  1551. def test_to_datetime_malformed_raise(self):
  1552. # GH 48633
  1553. ts_strings = ["200622-12-31", "111111-24-11"]
  1554. msg = (
  1555. 'Parsed string "200622-12-31" gives an invalid tzoffset, which must '
  1556. r"be between -timedelta\(hours=24\) and timedelta\(hours=24\), "
  1557. "at position 0"
  1558. )
  1559. with pytest.raises(
  1560. ValueError,
  1561. match=msg,
  1562. ):
  1563. with tm.assert_produces_warning(
  1564. UserWarning, match="Could not infer format"
  1565. ):
  1566. to_datetime(
  1567. ts_strings,
  1568. errors="raise",
  1569. )
  1570. def test_iso_8601_strings_with_same_offset(self):
  1571. # GH 17697, 11736
  1572. ts_str = "2015-11-18 15:30:00+05:30"
  1573. result = to_datetime(ts_str)
  1574. expected = Timestamp(ts_str)
  1575. assert result == expected
  1576. expected = DatetimeIndex([Timestamp(ts_str)] * 2)
  1577. result = to_datetime([ts_str] * 2)
  1578. tm.assert_index_equal(result, expected)
  1579. result = DatetimeIndex([ts_str] * 2)
  1580. tm.assert_index_equal(result, expected)
  1581. def test_iso_8601_strings_with_different_offsets(self):
  1582. # GH 17697, 11736, 50887
  1583. ts_strings = ["2015-11-18 15:30:00+05:30", "2015-11-18 16:30:00+06:30", NaT]
  1584. msg = "parsing datetimes with mixed time zones will raise an error"
  1585. with tm.assert_produces_warning(FutureWarning, match=msg):
  1586. result = to_datetime(ts_strings)
  1587. expected = np.array(
  1588. [
  1589. datetime(2015, 11, 18, 15, 30, tzinfo=tzoffset(None, 19800)),
  1590. datetime(2015, 11, 18, 16, 30, tzinfo=tzoffset(None, 23400)),
  1591. NaT,
  1592. ],
  1593. dtype=object,
  1594. )
  1595. # GH 21864
  1596. expected = Index(expected)
  1597. tm.assert_index_equal(result, expected)
  1598. def test_iso_8601_strings_with_different_offsets_utc(self):
  1599. ts_strings = ["2015-11-18 15:30:00+05:30", "2015-11-18 16:30:00+06:30", NaT]
  1600. result = to_datetime(ts_strings, utc=True)
  1601. expected = DatetimeIndex(
  1602. [Timestamp(2015, 11, 18, 10), Timestamp(2015, 11, 18, 10), NaT], tz="UTC"
  1603. )
  1604. tm.assert_index_equal(result, expected)
  1605. def test_mixed_offsets_with_native_datetime_raises(self):
  1606. # GH 25978
  1607. vals = [
  1608. "nan",
  1609. Timestamp("1990-01-01"),
  1610. "2015-03-14T16:15:14.123-08:00",
  1611. "2019-03-04T21:56:32.620-07:00",
  1612. None,
  1613. "today",
  1614. "now",
  1615. ]
  1616. ser = Series(vals)
  1617. assert all(ser[i] is vals[i] for i in range(len(vals))) # GH#40111
  1618. now = Timestamp("now")
  1619. today = Timestamp("today")
  1620. msg = "parsing datetimes with mixed time zones will raise an error"
  1621. with tm.assert_produces_warning(FutureWarning, match=msg):
  1622. mixed = to_datetime(ser)
  1623. expected = Series(
  1624. [
  1625. "NaT",
  1626. Timestamp("1990-01-01"),
  1627. Timestamp("2015-03-14T16:15:14.123-08:00").to_pydatetime(),
  1628. Timestamp("2019-03-04T21:56:32.620-07:00").to_pydatetime(),
  1629. None,
  1630. ],
  1631. dtype=object,
  1632. )
  1633. tm.assert_series_equal(mixed[:-2], expected)
  1634. # we'll check mixed[-1] and mixed[-2] match now and today to within
  1635. # call-timing tolerances
  1636. assert (now - mixed.iloc[-1]).total_seconds() <= 0.1
  1637. assert (today - mixed.iloc[-2]).total_seconds() <= 0.1
  1638. with pytest.raises(ValueError, match="Tz-aware datetime.datetime"):
  1639. to_datetime(mixed)
  1640. def test_non_iso_strings_with_tz_offset(self):
  1641. result = to_datetime(["March 1, 2018 12:00:00+0400"] * 2)
  1642. expected = DatetimeIndex(
  1643. [datetime(2018, 3, 1, 12, tzinfo=timezone(timedelta(minutes=240)))] * 2
  1644. )
  1645. tm.assert_index_equal(result, expected)
  1646. @pytest.mark.parametrize(
  1647. "ts, expected",
  1648. [
  1649. (Timestamp("2018-01-01"), Timestamp("2018-01-01", tz="UTC")),
  1650. (
  1651. Timestamp("2018-01-01", tz="US/Pacific"),
  1652. Timestamp("2018-01-01 08:00", tz="UTC"),
  1653. ),
  1654. ],
  1655. )
  1656. def test_timestamp_utc_true(self, ts, expected):
  1657. # GH 24415
  1658. result = to_datetime(ts, utc=True)
  1659. assert result == expected
  1660. @pytest.mark.parametrize("dt_str", ["00010101", "13000101", "30000101", "99990101"])
  1661. def test_to_datetime_with_format_out_of_bounds(self, dt_str):
  1662. # GH 9107
  1663. msg = "Out of bounds nanosecond timestamp"
  1664. with pytest.raises(OutOfBoundsDatetime, match=msg):
  1665. to_datetime(dt_str, format="%Y%m%d")
  1666. def test_to_datetime_utc(self):
  1667. arr = np.array([parse("2012-06-13T01:39:00Z")], dtype=object)
  1668. result = to_datetime(arr, utc=True)
  1669. assert result.tz is timezone.utc
  1670. def test_to_datetime_fixed_offset(self):
  1671. from pandas.tests.indexes.datetimes.test_timezones import FixedOffset
  1672. fixed_off = FixedOffset(-420, "-07:00")
  1673. dates = [
  1674. datetime(2000, 1, 1, tzinfo=fixed_off),
  1675. datetime(2000, 1, 2, tzinfo=fixed_off),
  1676. datetime(2000, 1, 3, tzinfo=fixed_off),
  1677. ]
  1678. result = to_datetime(dates)
  1679. assert result.tz == fixed_off
  1680. @pytest.mark.parametrize(
  1681. "date",
  1682. [
  1683. ["2020-10-26 00:00:00+06:00", "2020-10-26 00:00:00+01:00"],
  1684. ["2020-10-26 00:00:00+06:00", Timestamp("2018-01-01", tz="US/Pacific")],
  1685. [
  1686. "2020-10-26 00:00:00+06:00",
  1687. datetime(2020, 1, 1, 18, tzinfo=pytz.timezone("Australia/Melbourne")),
  1688. ],
  1689. ],
  1690. )
  1691. def test_to_datetime_mixed_offsets_with_utc_false_deprecated(self, date):
  1692. # GH 50887
  1693. msg = "parsing datetimes with mixed time zones will raise an error"
  1694. with tm.assert_produces_warning(FutureWarning, match=msg):
  1695. to_datetime(date, utc=False)
  1696. class TestToDatetimeUnit:
  1697. @pytest.mark.parametrize("unit", ["Y", "M"])
  1698. @pytest.mark.parametrize("item", [150, float(150)])
  1699. def test_to_datetime_month_or_year_unit_int(self, cache, unit, item, request):
  1700. # GH#50870 Note we have separate tests that pd.Timestamp gets these right
  1701. ts = Timestamp(item, unit=unit)
  1702. expected = DatetimeIndex([ts], dtype="M8[ns]")
  1703. result = to_datetime([item], unit=unit, cache=cache)
  1704. tm.assert_index_equal(result, expected)
  1705. result = to_datetime(np.array([item], dtype=object), unit=unit, cache=cache)
  1706. tm.assert_index_equal(result, expected)
  1707. result = to_datetime(np.array([item]), unit=unit, cache=cache)
  1708. tm.assert_index_equal(result, expected)
  1709. # with a nan!
  1710. result = to_datetime(np.array([item, np.nan]), unit=unit, cache=cache)
  1711. assert result.isna()[1]
  1712. tm.assert_index_equal(result[:1], expected)
  1713. @pytest.mark.parametrize("unit", ["Y", "M"])
  1714. def test_to_datetime_month_or_year_unit_non_round_float(self, cache, unit):
  1715. # GH#50301
  1716. # Match Timestamp behavior in disallowing non-round floats with
  1717. # Y or M unit
  1718. warn_msg = "strings will be parsed as datetime strings"
  1719. msg = f"Conversion of non-round float with unit={unit} is ambiguous"
  1720. with pytest.raises(ValueError, match=msg):
  1721. to_datetime([1.5], unit=unit, errors="raise")
  1722. with pytest.raises(ValueError, match=msg):
  1723. to_datetime(np.array([1.5]), unit=unit, errors="raise")
  1724. with pytest.raises(ValueError, match=msg):
  1725. with tm.assert_produces_warning(FutureWarning, match=warn_msg):
  1726. to_datetime(["1.5"], unit=unit, errors="raise")
  1727. # with errors="ignore" we also end up raising within the Timestamp
  1728. # constructor; this may not be ideal
  1729. with pytest.raises(ValueError, match=msg):
  1730. to_datetime([1.5], unit=unit, errors="ignore")
  1731. res = to_datetime([1.5], unit=unit, errors="coerce")
  1732. expected = Index([NaT], dtype="M8[ns]")
  1733. tm.assert_index_equal(res, expected)
  1734. with tm.assert_produces_warning(FutureWarning, match=warn_msg):
  1735. res = to_datetime(["1.5"], unit=unit, errors="coerce")
  1736. tm.assert_index_equal(res, expected)
  1737. # round floats are OK
  1738. res = to_datetime([1.0], unit=unit)
  1739. expected = to_datetime([1], unit=unit)
  1740. tm.assert_index_equal(res, expected)
  1741. def test_unit(self, cache):
  1742. # GH 11758
  1743. # test proper behavior with errors
  1744. msg = "cannot specify both format and unit"
  1745. with pytest.raises(ValueError, match=msg):
  1746. to_datetime([1], unit="D", format="%Y%m%d", cache=cache)
  1747. def test_unit_str(self, cache):
  1748. # GH 57051
  1749. # Test that strs aren't dropping precision to 32-bit accidentally.
  1750. with tm.assert_produces_warning(FutureWarning):
  1751. res = to_datetime(["1704660000"], unit="s", origin="unix")
  1752. expected = to_datetime([1704660000], unit="s", origin="unix")
  1753. tm.assert_index_equal(res, expected)
  1754. def test_unit_array_mixed_nans(self, cache):
  1755. values = [11111111111111111, 1, 1.0, iNaT, NaT, np.nan, "NaT", ""]
  1756. result = to_datetime(values, unit="D", errors="ignore", cache=cache)
  1757. expected = Index(
  1758. [
  1759. 11111111111111111,
  1760. Timestamp("1970-01-02"),
  1761. Timestamp("1970-01-02"),
  1762. NaT,
  1763. NaT,
  1764. NaT,
  1765. NaT,
  1766. NaT,
  1767. ],
  1768. dtype=object,
  1769. )
  1770. tm.assert_index_equal(result, expected)
  1771. result = to_datetime(values, unit="D", errors="coerce", cache=cache)
  1772. expected = DatetimeIndex(
  1773. ["NaT", "1970-01-02", "1970-01-02", "NaT", "NaT", "NaT", "NaT", "NaT"],
  1774. dtype="M8[ns]",
  1775. )
  1776. tm.assert_index_equal(result, expected)
  1777. msg = "cannot convert input 11111111111111111 with the unit 'D'"
  1778. with pytest.raises(OutOfBoundsDatetime, match=msg):
  1779. to_datetime(values, unit="D", errors="raise", cache=cache)
  1780. def test_unit_array_mixed_nans_large_int(self, cache):
  1781. values = [1420043460000000000000000, iNaT, NaT, np.nan, "NaT"]
  1782. result = to_datetime(values, errors="ignore", unit="s", cache=cache)
  1783. expected = Index([1420043460000000000000000, NaT, NaT, NaT, NaT], dtype=object)
  1784. tm.assert_index_equal(result, expected)
  1785. result = to_datetime(values, errors="coerce", unit="s", cache=cache)
  1786. expected = DatetimeIndex(["NaT", "NaT", "NaT", "NaT", "NaT"], dtype="M8[ns]")
  1787. tm.assert_index_equal(result, expected)
  1788. msg = "cannot convert input 1420043460000000000000000 with the unit 's'"
  1789. with pytest.raises(OutOfBoundsDatetime, match=msg):
  1790. to_datetime(values, errors="raise", unit="s", cache=cache)
  1791. def test_to_datetime_invalid_str_not_out_of_bounds_valuerror(self, cache):
  1792. # if we have a string, then we raise a ValueError
  1793. # and NOT an OutOfBoundsDatetime
  1794. msg = "non convertible value foo with the unit 's'"
  1795. with pytest.raises(ValueError, match=msg):
  1796. to_datetime("foo", errors="raise", unit="s", cache=cache)
  1797. @pytest.mark.parametrize("error", ["raise", "coerce", "ignore"])
  1798. def test_unit_consistency(self, cache, error):
  1799. # consistency of conversions
  1800. expected = Timestamp("1970-05-09 14:25:11")
  1801. result = to_datetime(11111111, unit="s", errors=error, cache=cache)
  1802. assert result == expected
  1803. assert isinstance(result, Timestamp)
  1804. @pytest.mark.parametrize("errors", ["ignore", "raise", "coerce"])
  1805. @pytest.mark.parametrize("dtype", ["float64", "int64"])
  1806. def test_unit_with_numeric(self, cache, errors, dtype):
  1807. # GH 13180
  1808. # coercions from floats/ints are ok
  1809. expected = DatetimeIndex(
  1810. ["2015-06-19 05:33:20", "2015-05-27 22:33:20"], dtype="M8[ns]"
  1811. )
  1812. arr = np.array([1.434692e18, 1.432766e18]).astype(dtype)
  1813. result = to_datetime(arr, errors=errors, cache=cache)
  1814. tm.assert_index_equal(result, expected)
  1815. @pytest.mark.parametrize(
  1816. "exp, arr, warning",
  1817. [
  1818. [
  1819. ["NaT", "2015-06-19 05:33:20", "2015-05-27 22:33:20"],
  1820. ["foo", 1.434692e18, 1.432766e18],
  1821. UserWarning,
  1822. ],
  1823. [
  1824. ["2015-06-19 05:33:20", "2015-05-27 22:33:20", "NaT", "NaT"],
  1825. [1.434692e18, 1.432766e18, "foo", "NaT"],
  1826. None,
  1827. ],
  1828. ],
  1829. )
  1830. def test_unit_with_numeric_coerce(self, cache, exp, arr, warning):
  1831. # but we want to make sure that we are coercing
  1832. # if we have ints/strings
  1833. expected = DatetimeIndex(exp, dtype="M8[ns]")
  1834. with tm.assert_produces_warning(warning, match="Could not infer format"):
  1835. result = to_datetime(arr, errors="coerce", cache=cache)
  1836. tm.assert_index_equal(result, expected)
  1837. @pytest.mark.parametrize(
  1838. "arr",
  1839. [
  1840. [Timestamp("20130101"), 1.434692e18, 1.432766e18],
  1841. [1.434692e18, 1.432766e18, Timestamp("20130101")],
  1842. ],
  1843. )
  1844. def test_unit_mixed(self, cache, arr):
  1845. # GH#50453 pre-2.0 with mixed numeric/datetimes and errors="coerce"
  1846. # the numeric entries would be coerced to NaT, was never clear exactly
  1847. # why.
  1848. # mixed integers/datetimes
  1849. expected = Index([Timestamp(x) for x in arr], dtype="M8[ns]")
  1850. result = to_datetime(arr, errors="coerce", cache=cache)
  1851. tm.assert_index_equal(result, expected)
  1852. # GH#49037 pre-2.0 this raised, but it always worked with Series,
  1853. # was never clear why it was disallowed
  1854. result = to_datetime(arr, errors="raise", cache=cache)
  1855. tm.assert_index_equal(result, expected)
  1856. result = DatetimeIndex(arr)
  1857. tm.assert_index_equal(result, expected)
  1858. def test_unit_rounding(self, cache):
  1859. # GH 14156 & GH 20445: argument will incur floating point errors
  1860. # but no premature rounding
  1861. value = 1434743731.8770001
  1862. result = to_datetime(value, unit="s", cache=cache)
  1863. expected = Timestamp("2015-06-19 19:55:31.877000093")
  1864. assert result == expected
  1865. alt = Timestamp(value, unit="s")
  1866. assert alt == result
  1867. def test_unit_ignore_keeps_name(self, cache):
  1868. # GH 21697
  1869. expected = Index([15e9] * 2, name="name")
  1870. result = to_datetime(expected, errors="ignore", unit="s", cache=cache)
  1871. tm.assert_index_equal(result, expected)
  1872. def test_to_datetime_errors_ignore_utc_true(self):
  1873. # GH#23758
  1874. result = to_datetime([1], unit="s", utc=True, errors="ignore")
  1875. expected = DatetimeIndex(["1970-01-01 00:00:01"], dtype="M8[ns, UTC]")
  1876. tm.assert_index_equal(result, expected)
  1877. @pytest.mark.parametrize("dtype", [int, float])
  1878. def test_to_datetime_unit(self, dtype):
  1879. epoch = 1370745748
  1880. ser = Series([epoch + t for t in range(20)]).astype(dtype)
  1881. result = to_datetime(ser, unit="s")
  1882. expected = Series(
  1883. [
  1884. Timestamp("2013-06-09 02:42:28") + timedelta(seconds=t)
  1885. for t in range(20)
  1886. ],
  1887. dtype="M8[ns]",
  1888. )
  1889. tm.assert_series_equal(result, expected)
  1890. @pytest.mark.parametrize("null", [iNaT, np.nan])
  1891. def test_to_datetime_unit_with_nulls(self, null):
  1892. epoch = 1370745748
  1893. ser = Series([epoch + t for t in range(20)] + [null])
  1894. result = to_datetime(ser, unit="s")
  1895. expected = Series(
  1896. [Timestamp("2013-06-09 02:42:28") + timedelta(seconds=t) for t in range(20)]
  1897. + [NaT],
  1898. dtype="M8[ns]",
  1899. )
  1900. tm.assert_series_equal(result, expected)
  1901. def test_to_datetime_unit_fractional_seconds(self):
  1902. # GH13834
  1903. epoch = 1370745748
  1904. ser = Series([epoch + t for t in np.arange(0, 2, 0.25)] + [iNaT]).astype(float)
  1905. result = to_datetime(ser, unit="s")
  1906. expected = Series(
  1907. [
  1908. Timestamp("2013-06-09 02:42:28") + timedelta(seconds=t)
  1909. for t in np.arange(0, 2, 0.25)
  1910. ]
  1911. + [NaT],
  1912. dtype="M8[ns]",
  1913. )
  1914. # GH20455 argument will incur floating point errors but no premature rounding
  1915. result = result.round("ms")
  1916. tm.assert_series_equal(result, expected)
  1917. def test_to_datetime_unit_na_values(self):
  1918. result = to_datetime([1, 2, "NaT", NaT, np.nan], unit="D")
  1919. expected = DatetimeIndex(
  1920. [Timestamp("1970-01-02"), Timestamp("1970-01-03")] + ["NaT"] * 3,
  1921. dtype="M8[ns]",
  1922. )
  1923. tm.assert_index_equal(result, expected)
  1924. @pytest.mark.parametrize("bad_val", ["foo", 111111111])
  1925. def test_to_datetime_unit_invalid(self, bad_val):
  1926. msg = f"{bad_val} with the unit 'D'"
  1927. with pytest.raises(ValueError, match=msg):
  1928. to_datetime([1, 2, bad_val], unit="D")
  1929. @pytest.mark.parametrize("bad_val", ["foo", 111111111])
  1930. def test_to_timestamp_unit_coerce(self, bad_val):
  1931. # coerce we can process
  1932. expected = DatetimeIndex(
  1933. [Timestamp("1970-01-02"), Timestamp("1970-01-03")] + ["NaT"] * 1,
  1934. dtype="M8[ns]",
  1935. )
  1936. result = to_datetime([1, 2, bad_val], unit="D", errors="coerce")
  1937. tm.assert_index_equal(result, expected)
  1938. def test_float_to_datetime_raise_near_bounds(self):
  1939. # GH50183
  1940. msg = "cannot convert input with unit 'D'"
  1941. oneday_in_ns = 1e9 * 60 * 60 * 24
  1942. tsmax_in_days = 2**63 / oneday_in_ns # 2**63 ns, in days
  1943. # just in bounds
  1944. should_succeed = Series(
  1945. [0, tsmax_in_days - 0.005, -tsmax_in_days + 0.005], dtype=float
  1946. )
  1947. expected = (should_succeed * oneday_in_ns).astype(np.int64)
  1948. for error_mode in ["raise", "coerce", "ignore"]:
  1949. result1 = to_datetime(should_succeed, unit="D", errors=error_mode)
  1950. # Cast to `np.float64` so that `rtol` and inexact checking kick in
  1951. # (`check_exact` doesn't take place for integer dtypes)
  1952. tm.assert_almost_equal(
  1953. result1.astype(np.int64).astype(np.float64),
  1954. expected.astype(np.float64),
  1955. rtol=1e-10,
  1956. )
  1957. # just out of bounds
  1958. should_fail1 = Series([0, tsmax_in_days + 0.005], dtype=float)
  1959. should_fail2 = Series([0, -tsmax_in_days - 0.005], dtype=float)
  1960. with pytest.raises(OutOfBoundsDatetime, match=msg):
  1961. to_datetime(should_fail1, unit="D", errors="raise")
  1962. with pytest.raises(OutOfBoundsDatetime, match=msg):
  1963. to_datetime(should_fail2, unit="D", errors="raise")
  1964. class TestToDatetimeDataFrame:
  1965. @pytest.fixture
  1966. def df(self):
  1967. return DataFrame(
  1968. {
  1969. "year": [2015, 2016],
  1970. "month": [2, 3],
  1971. "day": [4, 5],
  1972. "hour": [6, 7],
  1973. "minute": [58, 59],
  1974. "second": [10, 11],
  1975. "ms": [1, 1],
  1976. "us": [2, 2],
  1977. "ns": [3, 3],
  1978. }
  1979. )
  1980. def test_dataframe(self, df, cache):
  1981. result = to_datetime(
  1982. {"year": df["year"], "month": df["month"], "day": df["day"]}, cache=cache
  1983. )
  1984. expected = Series(
  1985. [Timestamp("20150204 00:00:00"), Timestamp("20160305 00:0:00")]
  1986. )
  1987. tm.assert_series_equal(result, expected)
  1988. # dict-like
  1989. result = to_datetime(df[["year", "month", "day"]].to_dict(), cache=cache)
  1990. tm.assert_series_equal(result, expected)
  1991. def test_dataframe_dict_with_constructable(self, df, cache):
  1992. # dict but with constructable
  1993. df2 = df[["year", "month", "day"]].to_dict()
  1994. df2["month"] = 2
  1995. result = to_datetime(df2, cache=cache)
  1996. expected2 = Series(
  1997. [Timestamp("20150204 00:00:00"), Timestamp("20160205 00:0:00")]
  1998. )
  1999. tm.assert_series_equal(result, expected2)
  2000. @pytest.mark.parametrize(
  2001. "unit",
  2002. [
  2003. {
  2004. "year": "years",
  2005. "month": "months",
  2006. "day": "days",
  2007. "hour": "hours",
  2008. "minute": "minutes",
  2009. "second": "seconds",
  2010. },
  2011. {
  2012. "year": "year",
  2013. "month": "month",
  2014. "day": "day",
  2015. "hour": "hour",
  2016. "minute": "minute",
  2017. "second": "second",
  2018. },
  2019. ],
  2020. )
  2021. def test_dataframe_field_aliases_column_subset(self, df, cache, unit):
  2022. # unit mappings
  2023. result = to_datetime(df[list(unit.keys())].rename(columns=unit), cache=cache)
  2024. expected = Series(
  2025. [Timestamp("20150204 06:58:10"), Timestamp("20160305 07:59:11")],
  2026. dtype="M8[ns]",
  2027. )
  2028. tm.assert_series_equal(result, expected)
  2029. def test_dataframe_field_aliases(self, df, cache):
  2030. d = {
  2031. "year": "year",
  2032. "month": "month",
  2033. "day": "day",
  2034. "hour": "hour",
  2035. "minute": "minute",
  2036. "second": "second",
  2037. "ms": "ms",
  2038. "us": "us",
  2039. "ns": "ns",
  2040. }
  2041. result = to_datetime(df.rename(columns=d), cache=cache)
  2042. expected = Series(
  2043. [
  2044. Timestamp("20150204 06:58:10.001002003"),
  2045. Timestamp("20160305 07:59:11.001002003"),
  2046. ]
  2047. )
  2048. tm.assert_series_equal(result, expected)
  2049. def test_dataframe_str_dtype(self, df, cache):
  2050. # coerce back to int
  2051. result = to_datetime(df.astype(str), cache=cache)
  2052. expected = Series(
  2053. [
  2054. Timestamp("20150204 06:58:10.001002003"),
  2055. Timestamp("20160305 07:59:11.001002003"),
  2056. ]
  2057. )
  2058. tm.assert_series_equal(result, expected)
  2059. def test_dataframe_coerce(self, cache):
  2060. # passing coerce
  2061. df2 = DataFrame({"year": [2015, 2016], "month": [2, 20], "day": [4, 5]})
  2062. msg = (
  2063. r'^cannot assemble the datetimes: time data ".+" doesn\'t '
  2064. r'match format "%Y%m%d", at position 1\.'
  2065. )
  2066. with pytest.raises(ValueError, match=msg):
  2067. to_datetime(df2, cache=cache)
  2068. result = to_datetime(df2, errors="coerce", cache=cache)
  2069. expected = Series([Timestamp("20150204 00:00:00"), NaT])
  2070. tm.assert_series_equal(result, expected)
  2071. def test_dataframe_extra_keys_raisesm(self, df, cache):
  2072. # extra columns
  2073. msg = r"extra keys have been passed to the datetime assemblage: \[foo\]"
  2074. with pytest.raises(ValueError, match=msg):
  2075. df2 = df.copy()
  2076. df2["foo"] = 1
  2077. to_datetime(df2, cache=cache)
  2078. @pytest.mark.parametrize(
  2079. "cols",
  2080. [
  2081. ["year"],
  2082. ["year", "month"],
  2083. ["year", "month", "second"],
  2084. ["month", "day"],
  2085. ["year", "day", "second"],
  2086. ],
  2087. )
  2088. def test_dataframe_missing_keys_raises(self, df, cache, cols):
  2089. # not enough
  2090. msg = (
  2091. r"to assemble mappings requires at least that \[year, month, "
  2092. r"day\] be specified: \[.+\] is missing"
  2093. )
  2094. with pytest.raises(ValueError, match=msg):
  2095. to_datetime(df[cols], cache=cache)
  2096. def test_dataframe_duplicate_columns_raises(self, cache):
  2097. # duplicates
  2098. msg = "cannot assemble with duplicate keys"
  2099. df2 = DataFrame({"year": [2015, 2016], "month": [2, 20], "day": [4, 5]})
  2100. df2.columns = ["year", "year", "day"]
  2101. with pytest.raises(ValueError, match=msg):
  2102. to_datetime(df2, cache=cache)
  2103. df2 = DataFrame(
  2104. {"year": [2015, 2016], "month": [2, 20], "day": [4, 5], "hour": [4, 5]}
  2105. )
  2106. df2.columns = ["year", "month", "day", "day"]
  2107. with pytest.raises(ValueError, match=msg):
  2108. to_datetime(df2, cache=cache)
  2109. def test_dataframe_int16(self, cache):
  2110. # GH#13451
  2111. df = DataFrame({"year": [2015, 2016], "month": [2, 3], "day": [4, 5]})
  2112. # int16
  2113. result = to_datetime(df.astype("int16"), cache=cache)
  2114. expected = Series(
  2115. [Timestamp("20150204 00:00:00"), Timestamp("20160305 00:00:00")]
  2116. )
  2117. tm.assert_series_equal(result, expected)
  2118. def test_dataframe_mixed(self, cache):
  2119. # mixed dtypes
  2120. df = DataFrame({"year": [2015, 2016], "month": [2, 3], "day": [4, 5]})
  2121. df["month"] = df["month"].astype("int8")
  2122. df["day"] = df["day"].astype("int8")
  2123. result = to_datetime(df, cache=cache)
  2124. expected = Series(
  2125. [Timestamp("20150204 00:00:00"), Timestamp("20160305 00:00:00")]
  2126. )
  2127. tm.assert_series_equal(result, expected)
  2128. def test_dataframe_float(self, cache):
  2129. # float
  2130. df = DataFrame({"year": [2000, 2001], "month": [1.5, 1], "day": [1, 1]})
  2131. msg = (
  2132. r"^cannot assemble the datetimes: unconverted data remains when parsing "
  2133. r'with format ".*": "1", at position 0.'
  2134. )
  2135. with pytest.raises(ValueError, match=msg):
  2136. to_datetime(df, cache=cache)
  2137. def test_dataframe_utc_true(self):
  2138. # GH#23760
  2139. df = DataFrame({"year": [2015, 2016], "month": [2, 3], "day": [4, 5]})
  2140. result = to_datetime(df, utc=True)
  2141. expected = Series(
  2142. np.array(["2015-02-04", "2016-03-05"], dtype="datetime64[ns]")
  2143. ).dt.tz_localize("UTC")
  2144. tm.assert_series_equal(result, expected)
  2145. class TestToDatetimeMisc:
  2146. def test_to_datetime_barely_out_of_bounds(self):
  2147. # GH#19529
  2148. # GH#19382 close enough to bounds that dropping nanos would result
  2149. # in an in-bounds datetime
  2150. arr = np.array(["2262-04-11 23:47:16.854775808"], dtype=object)
  2151. msg = "^Out of bounds nanosecond timestamp: .*, at position 0"
  2152. with pytest.raises(OutOfBoundsDatetime, match=msg):
  2153. to_datetime(arr)
  2154. @pytest.mark.parametrize(
  2155. "arg, exp_str",
  2156. [
  2157. ["2012-01-01 00:00:00", "2012-01-01 00:00:00"],
  2158. ["20121001", "2012-10-01"], # bad iso 8601
  2159. ],
  2160. )
  2161. def test_to_datetime_iso8601(self, cache, arg, exp_str):
  2162. result = to_datetime([arg], cache=cache)
  2163. exp = Timestamp(exp_str)
  2164. assert result[0] == exp
  2165. @pytest.mark.parametrize(
  2166. "input, format",
  2167. [
  2168. ("2012", "%Y-%m"),
  2169. ("2012-01", "%Y-%m-%d"),
  2170. ("2012-01-01", "%Y-%m-%d %H"),
  2171. ("2012-01-01 10", "%Y-%m-%d %H:%M"),
  2172. ("2012-01-01 10:00", "%Y-%m-%d %H:%M:%S"),
  2173. ("2012-01-01 10:00:00", "%Y-%m-%d %H:%M:%S.%f"),
  2174. ("2012-01-01 10:00:00.123", "%Y-%m-%d %H:%M:%S.%f%z"),
  2175. (0, "%Y-%m-%d"),
  2176. ],
  2177. )
  2178. @pytest.mark.parametrize("exact", [True, False])
  2179. def test_to_datetime_iso8601_fails(self, input, format, exact):
  2180. # https://github.com/pandas-dev/pandas/issues/12649
  2181. # `format` is longer than the string, so this fails regardless of `exact`
  2182. with pytest.raises(
  2183. ValueError,
  2184. match=(
  2185. rf"time data \"{input}\" doesn't match format "
  2186. rf"\"{format}\", at position 0"
  2187. ),
  2188. ):
  2189. to_datetime(input, format=format, exact=exact)
  2190. @pytest.mark.parametrize(
  2191. "input, format",
  2192. [
  2193. ("2012-01-01", "%Y-%m"),
  2194. ("2012-01-01 10", "%Y-%m-%d"),
  2195. ("2012-01-01 10:00", "%Y-%m-%d %H"),
  2196. ("2012-01-01 10:00:00", "%Y-%m-%d %H:%M"),
  2197. (0, "%Y-%m-%d"),
  2198. ],
  2199. )
  2200. def test_to_datetime_iso8601_exact_fails(self, input, format):
  2201. # https://github.com/pandas-dev/pandas/issues/12649
  2202. # `format` is shorter than the date string, so only fails with `exact=True`
  2203. msg = "|".join(
  2204. [
  2205. '^unconverted data remains when parsing with format ".*": ".*"'
  2206. f", at position 0. {PARSING_ERR_MSG}$",
  2207. f'^time data ".*" doesn\'t match format ".*", at position 0. '
  2208. f"{PARSING_ERR_MSG}$",
  2209. ]
  2210. )
  2211. with pytest.raises(
  2212. ValueError,
  2213. match=(msg),
  2214. ):
  2215. to_datetime(input, format=format)
  2216. @pytest.mark.parametrize(
  2217. "input, format",
  2218. [
  2219. ("2012-01-01", "%Y-%m"),
  2220. ("2012-01-01 00", "%Y-%m-%d"),
  2221. ("2012-01-01 00:00", "%Y-%m-%d %H"),
  2222. ("2012-01-01 00:00:00", "%Y-%m-%d %H:%M"),
  2223. ],
  2224. )
  2225. def test_to_datetime_iso8601_non_exact(self, input, format):
  2226. # https://github.com/pandas-dev/pandas/issues/12649
  2227. expected = Timestamp(2012, 1, 1)
  2228. result = to_datetime(input, format=format, exact=False)
  2229. assert result == expected
  2230. @pytest.mark.parametrize(
  2231. "input, format",
  2232. [
  2233. ("2020-01", "%Y/%m"),
  2234. ("2020-01-01", "%Y/%m/%d"),
  2235. ("2020-01-01 00", "%Y/%m/%dT%H"),
  2236. ("2020-01-01T00", "%Y/%m/%d %H"),
  2237. ("2020-01-01 00:00", "%Y/%m/%dT%H:%M"),
  2238. ("2020-01-01T00:00", "%Y/%m/%d %H:%M"),
  2239. ("2020-01-01 00:00:00", "%Y/%m/%dT%H:%M:%S"),
  2240. ("2020-01-01T00:00:00", "%Y/%m/%d %H:%M:%S"),
  2241. ],
  2242. )
  2243. def test_to_datetime_iso8601_separator(self, input, format):
  2244. # https://github.com/pandas-dev/pandas/issues/12649
  2245. with pytest.raises(
  2246. ValueError,
  2247. match=(
  2248. rf"time data \"{input}\" doesn\'t match format "
  2249. rf"\"{format}\", at position 0"
  2250. ),
  2251. ):
  2252. to_datetime(input, format=format)
  2253. @pytest.mark.parametrize(
  2254. "input, format",
  2255. [
  2256. ("2020-01", "%Y-%m"),
  2257. ("2020-01-01", "%Y-%m-%d"),
  2258. ("2020-01-01 00", "%Y-%m-%d %H"),
  2259. ("2020-01-01T00", "%Y-%m-%dT%H"),
  2260. ("2020-01-01 00:00", "%Y-%m-%d %H:%M"),
  2261. ("2020-01-01T00:00", "%Y-%m-%dT%H:%M"),
  2262. ("2020-01-01 00:00:00", "%Y-%m-%d %H:%M:%S"),
  2263. ("2020-01-01T00:00:00", "%Y-%m-%dT%H:%M:%S"),
  2264. ("2020-01-01T00:00:00.000", "%Y-%m-%dT%H:%M:%S.%f"),
  2265. ("2020-01-01T00:00:00.000000", "%Y-%m-%dT%H:%M:%S.%f"),
  2266. ("2020-01-01T00:00:00.000000000", "%Y-%m-%dT%H:%M:%S.%f"),
  2267. ],
  2268. )
  2269. def test_to_datetime_iso8601_valid(self, input, format):
  2270. # https://github.com/pandas-dev/pandas/issues/12649
  2271. expected = Timestamp(2020, 1, 1)
  2272. result = to_datetime(input, format=format)
  2273. assert result == expected
  2274. @pytest.mark.parametrize(
  2275. "input, format",
  2276. [
  2277. ("2020-1", "%Y-%m"),
  2278. ("2020-1-1", "%Y-%m-%d"),
  2279. ("2020-1-1 0", "%Y-%m-%d %H"),
  2280. ("2020-1-1T0", "%Y-%m-%dT%H"),
  2281. ("2020-1-1 0:0", "%Y-%m-%d %H:%M"),
  2282. ("2020-1-1T0:0", "%Y-%m-%dT%H:%M"),
  2283. ("2020-1-1 0:0:0", "%Y-%m-%d %H:%M:%S"),
  2284. ("2020-1-1T0:0:0", "%Y-%m-%dT%H:%M:%S"),
  2285. ("2020-1-1T0:0:0.000", "%Y-%m-%dT%H:%M:%S.%f"),
  2286. ("2020-1-1T0:0:0.000000", "%Y-%m-%dT%H:%M:%S.%f"),
  2287. ("2020-1-1T0:0:0.000000000", "%Y-%m-%dT%H:%M:%S.%f"),
  2288. ],
  2289. )
  2290. def test_to_datetime_iso8601_non_padded(self, input, format):
  2291. # https://github.com/pandas-dev/pandas/issues/21422
  2292. expected = Timestamp(2020, 1, 1)
  2293. result = to_datetime(input, format=format)
  2294. assert result == expected
  2295. @pytest.mark.parametrize(
  2296. "input, format",
  2297. [
  2298. ("2020-01-01T00:00:00.000000000+00:00", "%Y-%m-%dT%H:%M:%S.%f%z"),
  2299. ("2020-01-01T00:00:00+00:00", "%Y-%m-%dT%H:%M:%S%z"),
  2300. ("2020-01-01T00:00:00Z", "%Y-%m-%dT%H:%M:%S%z"),
  2301. ],
  2302. )
  2303. def test_to_datetime_iso8601_with_timezone_valid(self, input, format):
  2304. # https://github.com/pandas-dev/pandas/issues/12649
  2305. expected = Timestamp(2020, 1, 1, tzinfo=pytz.UTC)
  2306. result = to_datetime(input, format=format)
  2307. assert result == expected
  2308. def test_to_datetime_default(self, cache):
  2309. rs = to_datetime("2001", cache=cache)
  2310. xp = datetime(2001, 1, 1)
  2311. assert rs == xp
  2312. @pytest.mark.xfail(reason="fails to enforce dayfirst=True, which would raise")
  2313. def test_to_datetime_respects_dayfirst(self, cache):
  2314. # dayfirst is essentially broken
  2315. # The msg here is not important since it isn't actually raised yet.
  2316. msg = "Invalid date specified"
  2317. with pytest.raises(ValueError, match=msg):
  2318. # if dayfirst is respected, then this would parse as month=13, which
  2319. # would raise
  2320. with tm.assert_produces_warning(UserWarning, match="Provide format"):
  2321. to_datetime("01-13-2012", dayfirst=True, cache=cache)
  2322. def test_to_datetime_on_datetime64_series(self, cache):
  2323. # #2699
  2324. ser = Series(date_range("1/1/2000", periods=10))
  2325. result = to_datetime(ser, cache=cache)
  2326. assert result[0] == ser[0]
  2327. def test_to_datetime_with_space_in_series(self, cache):
  2328. # GH 6428
  2329. ser = Series(["10/18/2006", "10/18/2008", " "])
  2330. msg = (
  2331. r'^time data " " doesn\'t match format "%m/%d/%Y", '
  2332. rf"at position 2. {PARSING_ERR_MSG}$"
  2333. )
  2334. with pytest.raises(ValueError, match=msg):
  2335. to_datetime(ser, errors="raise", cache=cache)
  2336. result_coerce = to_datetime(ser, errors="coerce", cache=cache)
  2337. expected_coerce = Series([datetime(2006, 10, 18), datetime(2008, 10, 18), NaT])
  2338. tm.assert_series_equal(result_coerce, expected_coerce)
  2339. result_ignore = to_datetime(ser, errors="ignore", cache=cache)
  2340. tm.assert_series_equal(result_ignore, ser)
  2341. @td.skip_if_not_us_locale
  2342. def test_to_datetime_with_apply(self, cache):
  2343. # this is only locale tested with US/None locales
  2344. # GH 5195
  2345. # with a format and coerce a single item to_datetime fails
  2346. td = Series(["May 04", "Jun 02", "Dec 11"], index=[1, 2, 3])
  2347. expected = to_datetime(td, format="%b %y", cache=cache)
  2348. result = td.apply(to_datetime, format="%b %y", cache=cache)
  2349. tm.assert_series_equal(result, expected)
  2350. def test_to_datetime_timezone_name(self):
  2351. # https://github.com/pandas-dev/pandas/issues/49748
  2352. result = to_datetime("2020-01-01 00:00:00UTC", format="%Y-%m-%d %H:%M:%S%Z")
  2353. expected = Timestamp(2020, 1, 1).tz_localize("UTC")
  2354. assert result == expected
  2355. @td.skip_if_not_us_locale
  2356. @pytest.mark.parametrize("errors", ["raise", "coerce", "ignore"])
  2357. def test_to_datetime_with_apply_with_empty_str(self, cache, errors):
  2358. # this is only locale tested with US/None locales
  2359. # GH 5195, GH50251
  2360. # with a format and coerce a single item to_datetime fails
  2361. td = Series(["May 04", "Jun 02", ""], index=[1, 2, 3])
  2362. expected = to_datetime(td, format="%b %y", errors=errors, cache=cache)
  2363. result = td.apply(
  2364. lambda x: to_datetime(x, format="%b %y", errors="coerce", cache=cache)
  2365. )
  2366. tm.assert_series_equal(result, expected)
  2367. def test_to_datetime_empty_stt(self, cache):
  2368. # empty string
  2369. result = to_datetime("", cache=cache)
  2370. assert result is NaT
  2371. def test_to_datetime_empty_str_list(self, cache):
  2372. result = to_datetime(["", ""], cache=cache)
  2373. assert isna(result).all()
  2374. def test_to_datetime_zero(self, cache):
  2375. # ints
  2376. result = Timestamp(0)
  2377. expected = to_datetime(0, cache=cache)
  2378. assert result == expected
  2379. def test_to_datetime_strings(self, cache):
  2380. # GH 3888 (strings)
  2381. expected = to_datetime(["2012"], cache=cache)[0]
  2382. result = to_datetime("2012", cache=cache)
  2383. assert result == expected
  2384. def test_to_datetime_strings_variation(self, cache):
  2385. array = ["2012", "20120101", "20120101 12:01:01"]
  2386. expected = [to_datetime(dt_str, cache=cache) for dt_str in array]
  2387. result = [Timestamp(date_str) for date_str in array]
  2388. tm.assert_almost_equal(result, expected)
  2389. @pytest.mark.parametrize("result", [Timestamp("2012"), to_datetime("2012")])
  2390. def test_to_datetime_strings_vs_constructor(self, result):
  2391. expected = Timestamp(2012, 1, 1)
  2392. assert result == expected
  2393. def test_to_datetime_unprocessable_input(self, cache):
  2394. # GH 4928
  2395. # GH 21864
  2396. result = to_datetime([1, "1"], errors="ignore", cache=cache)
  2397. expected = Index(np.array([1, "1"], dtype="O"))
  2398. tm.assert_equal(result, expected)
  2399. msg = '^Given date string "1" not likely a datetime, at position 1$'
  2400. with pytest.raises(ValueError, match=msg):
  2401. to_datetime([1, "1"], errors="raise", cache=cache)
  2402. def test_to_datetime_unhashable_input(self, cache):
  2403. series = Series([["a"]] * 100)
  2404. result = to_datetime(series, errors="ignore", cache=cache)
  2405. tm.assert_series_equal(series, result)
  2406. def test_to_datetime_other_datetime64_units(self):
  2407. # 5/25/2012
  2408. scalar = np.int64(1337904000000000).view("M8[us]")
  2409. as_obj = scalar.astype("O")
  2410. index = DatetimeIndex([scalar])
  2411. assert index[0] == scalar.astype("O")
  2412. value = Timestamp(scalar)
  2413. assert value == as_obj
  2414. def test_to_datetime_list_of_integers(self):
  2415. rng = date_range("1/1/2000", periods=20)
  2416. rng = DatetimeIndex(rng.values)
  2417. ints = list(rng.asi8)
  2418. result = DatetimeIndex(ints)
  2419. tm.assert_index_equal(rng, result)
  2420. def test_to_datetime_overflow(self):
  2421. # gh-17637
  2422. # we are overflowing Timedelta range here
  2423. msg = "Cannot cast 139999 days 00:00:00 to unit='ns' without overflow"
  2424. with pytest.raises(OutOfBoundsTimedelta, match=msg):
  2425. date_range(start="1/1/1700", freq="B", periods=100000)
  2426. def test_string_invalid_operation(self, cache):
  2427. invalid = np.array(["87156549591102612381000001219H5"], dtype=object)
  2428. # GH #51084
  2429. with pytest.raises(ValueError, match="Unknown datetime string format"):
  2430. to_datetime(invalid, errors="raise", cache=cache)
  2431. def test_string_na_nat_conversion(self, cache):
  2432. # GH #999, #858
  2433. strings = np.array(["1/1/2000", "1/2/2000", np.nan, "1/4/2000"], dtype=object)
  2434. expected = np.empty(4, dtype="M8[ns]")
  2435. for i, val in enumerate(strings):
  2436. if isna(val):
  2437. expected[i] = iNaT
  2438. else:
  2439. expected[i] = parse(val)
  2440. result = tslib.array_to_datetime(strings)[0]
  2441. tm.assert_almost_equal(result, expected)
  2442. result2 = to_datetime(strings, cache=cache)
  2443. assert isinstance(result2, DatetimeIndex)
  2444. tm.assert_numpy_array_equal(result, result2.values)
  2445. def test_string_na_nat_conversion_malformed(self, cache):
  2446. malformed = np.array(["1/100/2000", np.nan], dtype=object)
  2447. # GH 10636, default is now 'raise'
  2448. msg = r"Unknown datetime string format"
  2449. with pytest.raises(ValueError, match=msg):
  2450. to_datetime(malformed, errors="raise", cache=cache)
  2451. result = to_datetime(malformed, errors="ignore", cache=cache)
  2452. # GH 21864
  2453. expected = Index(malformed, dtype=object)
  2454. tm.assert_index_equal(result, expected)
  2455. with pytest.raises(ValueError, match=msg):
  2456. to_datetime(malformed, errors="raise", cache=cache)
  2457. def test_string_na_nat_conversion_with_name(self, cache):
  2458. idx = ["a", "b", "c", "d", "e"]
  2459. series = Series(
  2460. ["1/1/2000", np.nan, "1/3/2000", np.nan, "1/5/2000"], index=idx, name="foo"
  2461. )
  2462. dseries = Series(
  2463. [
  2464. to_datetime("1/1/2000", cache=cache),
  2465. np.nan,
  2466. to_datetime("1/3/2000", cache=cache),
  2467. np.nan,
  2468. to_datetime("1/5/2000", cache=cache),
  2469. ],
  2470. index=idx,
  2471. name="foo",
  2472. )
  2473. result = to_datetime(series, cache=cache)
  2474. dresult = to_datetime(dseries, cache=cache)
  2475. expected = Series(np.empty(5, dtype="M8[ns]"), index=idx)
  2476. for i in range(5):
  2477. x = series.iloc[i]
  2478. if isna(x):
  2479. expected.iloc[i] = NaT
  2480. else:
  2481. expected.iloc[i] = to_datetime(x, cache=cache)
  2482. tm.assert_series_equal(result, expected, check_names=False)
  2483. assert result.name == "foo"
  2484. tm.assert_series_equal(dresult, expected, check_names=False)
  2485. assert dresult.name == "foo"
  2486. @pytest.mark.parametrize(
  2487. "unit",
  2488. ["h", "m", "s", "ms", "us", "ns"],
  2489. )
  2490. def test_dti_constructor_numpy_timeunits(self, cache, unit):
  2491. # GH 9114
  2492. dtype = np.dtype(f"M8[{unit}]")
  2493. base = to_datetime(["2000-01-01T00:00", "2000-01-02T00:00", "NaT"], cache=cache)
  2494. values = base.values.astype(dtype)
  2495. if unit in ["h", "m"]:
  2496. # we cast to closest supported unit
  2497. unit = "s"
  2498. exp_dtype = np.dtype(f"M8[{unit}]")
  2499. expected = DatetimeIndex(base.astype(exp_dtype))
  2500. assert expected.dtype == exp_dtype
  2501. tm.assert_index_equal(DatetimeIndex(values), expected)
  2502. tm.assert_index_equal(to_datetime(values, cache=cache), expected)
  2503. def test_dayfirst(self, cache):
  2504. # GH 5917
  2505. arr = ["10/02/2014", "11/02/2014", "12/02/2014"]
  2506. expected = DatetimeIndex(
  2507. [datetime(2014, 2, 10), datetime(2014, 2, 11), datetime(2014, 2, 12)]
  2508. )
  2509. idx1 = DatetimeIndex(arr, dayfirst=True)
  2510. idx2 = DatetimeIndex(np.array(arr), dayfirst=True)
  2511. idx3 = to_datetime(arr, dayfirst=True, cache=cache)
  2512. idx4 = to_datetime(np.array(arr), dayfirst=True, cache=cache)
  2513. idx5 = DatetimeIndex(Index(arr), dayfirst=True)
  2514. idx6 = DatetimeIndex(Series(arr), dayfirst=True)
  2515. tm.assert_index_equal(expected, idx1)
  2516. tm.assert_index_equal(expected, idx2)
  2517. tm.assert_index_equal(expected, idx3)
  2518. tm.assert_index_equal(expected, idx4)
  2519. tm.assert_index_equal(expected, idx5)
  2520. tm.assert_index_equal(expected, idx6)
  2521. def test_dayfirst_warnings_valid_input(self):
  2522. # GH 12585
  2523. warning_msg = (
  2524. "Parsing dates in .* format when dayfirst=.* was specified. "
  2525. "Pass `dayfirst=.*` or specify a format to silence this warning."
  2526. )
  2527. # CASE 1: valid input
  2528. arr = ["31/12/2014", "10/03/2011"]
  2529. expected = DatetimeIndex(
  2530. ["2014-12-31", "2011-03-10"], dtype="datetime64[ns]", freq=None
  2531. )
  2532. # A. dayfirst arg correct, no warning
  2533. res1 = to_datetime(arr, dayfirst=True)
  2534. tm.assert_index_equal(expected, res1)
  2535. # B. dayfirst arg incorrect, warning
  2536. with tm.assert_produces_warning(UserWarning, match=warning_msg):
  2537. res2 = to_datetime(arr, dayfirst=False)
  2538. tm.assert_index_equal(expected, res2)
  2539. def test_dayfirst_warnings_invalid_input(self):
  2540. # CASE 2: invalid input
  2541. # cannot consistently process with single format
  2542. # ValueError *always* raised
  2543. # first in DD/MM/YYYY, second in MM/DD/YYYY
  2544. arr = ["31/12/2014", "03/30/2011"]
  2545. with pytest.raises(
  2546. ValueError,
  2547. match=(
  2548. r'^time data "03/30/2011" doesn\'t match format '
  2549. rf'"%d/%m/%Y", at position 1. {PARSING_ERR_MSG}$'
  2550. ),
  2551. ):
  2552. to_datetime(arr, dayfirst=True)
  2553. @pytest.mark.parametrize("klass", [DatetimeIndex, DatetimeArray._from_sequence])
  2554. def test_to_datetime_dta_tz(self, klass):
  2555. # GH#27733
  2556. dti = date_range("2015-04-05", periods=3).rename("foo")
  2557. expected = dti.tz_localize("UTC")
  2558. obj = klass(dti)
  2559. expected = klass(expected)
  2560. result = to_datetime(obj, utc=True)
  2561. tm.assert_equal(result, expected)
  2562. class TestGuessDatetimeFormat:
  2563. @pytest.mark.parametrize(
  2564. "test_list",
  2565. [
  2566. [
  2567. "2011-12-30 00:00:00.000000",
  2568. "2011-12-30 00:00:00.000000",
  2569. "2011-12-30 00:00:00.000000",
  2570. ],
  2571. [np.nan, np.nan, "2011-12-30 00:00:00.000000"],
  2572. ["", "2011-12-30 00:00:00.000000"],
  2573. ["NaT", "2011-12-30 00:00:00.000000"],
  2574. ["2011-12-30 00:00:00.000000", "random_string"],
  2575. ["now", "2011-12-30 00:00:00.000000"],
  2576. ["today", "2011-12-30 00:00:00.000000"],
  2577. ],
  2578. )
  2579. def test_guess_datetime_format_for_array(self, test_list):
  2580. expected_format = "%Y-%m-%d %H:%M:%S.%f"
  2581. test_array = np.array(test_list, dtype=object)
  2582. assert tools._guess_datetime_format_for_array(test_array) == expected_format
  2583. @td.skip_if_not_us_locale
  2584. def test_guess_datetime_format_for_array_all_nans(self):
  2585. format_for_string_of_nans = tools._guess_datetime_format_for_array(
  2586. np.array([np.nan, np.nan, np.nan], dtype="O")
  2587. )
  2588. assert format_for_string_of_nans is None
  2589. class TestToDatetimeInferFormat:
  2590. @pytest.mark.parametrize(
  2591. "test_format", ["%m-%d-%Y", "%m/%d/%Y %H:%M:%S.%f", "%Y-%m-%dT%H:%M:%S.%f"]
  2592. )
  2593. def test_to_datetime_infer_datetime_format_consistent_format(
  2594. self, cache, test_format
  2595. ):
  2596. ser = Series(date_range("20000101", periods=50, freq="h"))
  2597. s_as_dt_strings = ser.apply(lambda x: x.strftime(test_format))
  2598. with_format = to_datetime(s_as_dt_strings, format=test_format, cache=cache)
  2599. without_format = to_datetime(s_as_dt_strings, cache=cache)
  2600. # Whether the format is explicitly passed, or
  2601. # it is inferred, the results should all be the same
  2602. tm.assert_series_equal(with_format, without_format)
  2603. def test_to_datetime_inconsistent_format(self, cache):
  2604. data = ["01/01/2011 00:00:00", "01-02-2011 00:00:00", "2011-01-03T00:00:00"]
  2605. ser = Series(np.array(data))
  2606. msg = (
  2607. r'^time data "01-02-2011 00:00:00" doesn\'t match format '
  2608. rf'"%m/%d/%Y %H:%M:%S", at position 1. {PARSING_ERR_MSG}$'
  2609. )
  2610. with pytest.raises(ValueError, match=msg):
  2611. to_datetime(ser, cache=cache)
  2612. def test_to_datetime_consistent_format(self, cache):
  2613. data = ["Jan/01/2011", "Feb/01/2011", "Mar/01/2011"]
  2614. ser = Series(np.array(data))
  2615. result = to_datetime(ser, cache=cache)
  2616. expected = Series(
  2617. ["2011-01-01", "2011-02-01", "2011-03-01"], dtype="datetime64[ns]"
  2618. )
  2619. tm.assert_series_equal(result, expected)
  2620. def test_to_datetime_series_with_nans(self, cache):
  2621. ser = Series(
  2622. np.array(
  2623. ["01/01/2011 00:00:00", np.nan, "01/03/2011 00:00:00", np.nan],
  2624. dtype=object,
  2625. )
  2626. )
  2627. result = to_datetime(ser, cache=cache)
  2628. expected = Series(
  2629. ["2011-01-01", NaT, "2011-01-03", NaT], dtype="datetime64[ns]"
  2630. )
  2631. tm.assert_series_equal(result, expected)
  2632. def test_to_datetime_series_start_with_nans(self, cache):
  2633. ser = Series(
  2634. np.array(
  2635. [
  2636. np.nan,
  2637. np.nan,
  2638. "01/01/2011 00:00:00",
  2639. "01/02/2011 00:00:00",
  2640. "01/03/2011 00:00:00",
  2641. ],
  2642. dtype=object,
  2643. )
  2644. )
  2645. result = to_datetime(ser, cache=cache)
  2646. expected = Series(
  2647. [NaT, NaT, "2011-01-01", "2011-01-02", "2011-01-03"], dtype="datetime64[ns]"
  2648. )
  2649. tm.assert_series_equal(result, expected)
  2650. @pytest.mark.parametrize(
  2651. "tz_name, offset",
  2652. [("UTC", 0), ("UTC-3", 180), ("UTC+3", -180)],
  2653. )
  2654. def test_infer_datetime_format_tz_name(self, tz_name, offset):
  2655. # GH 33133
  2656. ser = Series([f"2019-02-02 08:07:13 {tz_name}"])
  2657. result = to_datetime(ser)
  2658. tz = timezone(timedelta(minutes=offset))
  2659. expected = Series([Timestamp("2019-02-02 08:07:13").tz_localize(tz)])
  2660. tm.assert_series_equal(result, expected)
  2661. @pytest.mark.parametrize(
  2662. "ts,zero_tz",
  2663. [
  2664. ("2019-02-02 08:07:13", "Z"),
  2665. ("2019-02-02 08:07:13", ""),
  2666. ("2019-02-02 08:07:13.012345", "Z"),
  2667. ("2019-02-02 08:07:13.012345", ""),
  2668. ],
  2669. )
  2670. def test_infer_datetime_format_zero_tz(self, ts, zero_tz):
  2671. # GH 41047
  2672. ser = Series([ts + zero_tz])
  2673. result = to_datetime(ser)
  2674. tz = pytz.utc if zero_tz == "Z" else None
  2675. expected = Series([Timestamp(ts, tz=tz)])
  2676. tm.assert_series_equal(result, expected)
  2677. @pytest.mark.parametrize("format", [None, "%Y-%m-%d"])
  2678. def test_to_datetime_iso8601_noleading_0s(self, cache, format):
  2679. # GH 11871
  2680. ser = Series(["2014-1-1", "2014-2-2", "2015-3-3"])
  2681. expected = Series(
  2682. [
  2683. Timestamp("2014-01-01"),
  2684. Timestamp("2014-02-02"),
  2685. Timestamp("2015-03-03"),
  2686. ]
  2687. )
  2688. result = to_datetime(ser, format=format, cache=cache)
  2689. tm.assert_series_equal(result, expected)
  2690. def test_parse_dates_infer_datetime_format_warning(self):
  2691. # GH 49024
  2692. with tm.assert_produces_warning(
  2693. UserWarning,
  2694. match="The argument 'infer_datetime_format' is deprecated",
  2695. ):
  2696. to_datetime(["10-10-2000"], infer_datetime_format=True)
  2697. class TestDaysInMonth:
  2698. # tests for issue #10154
  2699. @pytest.mark.parametrize(
  2700. "arg, format",
  2701. [
  2702. ["2015-02-29", None],
  2703. ["2015-02-29", "%Y-%m-%d"],
  2704. ["2015-02-32", "%Y-%m-%d"],
  2705. ["2015-04-31", "%Y-%m-%d"],
  2706. ],
  2707. )
  2708. def test_day_not_in_month_coerce(self, cache, arg, format):
  2709. assert isna(to_datetime(arg, errors="coerce", format=format, cache=cache))
  2710. def test_day_not_in_month_raise(self, cache):
  2711. if PY314:
  2712. msg = "day 29 must be in range 1..28 for month 2 in year 2015: 2015-02-29"
  2713. else:
  2714. msg = "day is out of range for month: 2015-02-29"
  2715. with pytest.raises(ValueError, match=msg):
  2716. to_datetime("2015-02-29", errors="raise", cache=cache)
  2717. @pytest.mark.parametrize(
  2718. "arg, format, msg",
  2719. [
  2720. (
  2721. "2015-02-29",
  2722. "%Y-%m-%d",
  2723. f"^{DAY_IS_OUT_OF_RANGE}. {PARSING_ERR_MSG}$",
  2724. ),
  2725. (
  2726. "2015-29-02",
  2727. "%Y-%d-%m",
  2728. f"^{DAY_IS_OUT_OF_RANGE}. {PARSING_ERR_MSG}$",
  2729. ),
  2730. (
  2731. "2015-02-32",
  2732. "%Y-%m-%d",
  2733. '^unconverted data remains when parsing with format "%Y-%m-%d": "2", '
  2734. f"at position 0. {PARSING_ERR_MSG}$",
  2735. ),
  2736. (
  2737. "2015-32-02",
  2738. "%Y-%d-%m",
  2739. '^time data "2015-32-02" doesn\'t match format "%Y-%d-%m", '
  2740. f"at position 0. {PARSING_ERR_MSG}$",
  2741. ),
  2742. (
  2743. "2015-04-31",
  2744. "%Y-%m-%d",
  2745. f"^{DAY_IS_OUT_OF_RANGE}. {PARSING_ERR_MSG}$",
  2746. ),
  2747. (
  2748. "2015-31-04",
  2749. "%Y-%d-%m",
  2750. f"^{DAY_IS_OUT_OF_RANGE}. {PARSING_ERR_MSG}$",
  2751. ),
  2752. ],
  2753. )
  2754. def test_day_not_in_month_raise_value(self, cache, arg, format, msg):
  2755. # https://github.com/pandas-dev/pandas/issues/50462
  2756. with pytest.raises(ValueError, match=msg):
  2757. to_datetime(arg, errors="raise", format=format, cache=cache)
  2758. @pytest.mark.parametrize(
  2759. "expected, format",
  2760. [
  2761. ["2015-02-29", None],
  2762. ["2015-02-29", "%Y-%m-%d"],
  2763. ["2015-02-29", "%Y-%m-%d"],
  2764. ["2015-04-31", "%Y-%m-%d"],
  2765. ],
  2766. )
  2767. def test_day_not_in_month_ignore(self, cache, expected, format):
  2768. result = to_datetime(expected, errors="ignore", format=format, cache=cache)
  2769. assert result == expected
  2770. class TestDatetimeParsingWrappers:
  2771. @pytest.mark.parametrize(
  2772. "date_str, expected",
  2773. [
  2774. ("2011-01-01", datetime(2011, 1, 1)),
  2775. ("2Q2005", datetime(2005, 4, 1)),
  2776. ("2Q05", datetime(2005, 4, 1)),
  2777. ("2005Q1", datetime(2005, 1, 1)),
  2778. ("05Q1", datetime(2005, 1, 1)),
  2779. ("2011Q3", datetime(2011, 7, 1)),
  2780. ("11Q3", datetime(2011, 7, 1)),
  2781. ("3Q2011", datetime(2011, 7, 1)),
  2782. ("3Q11", datetime(2011, 7, 1)),
  2783. # quarterly without space
  2784. ("2000Q4", datetime(2000, 10, 1)),
  2785. ("00Q4", datetime(2000, 10, 1)),
  2786. ("4Q2000", datetime(2000, 10, 1)),
  2787. ("4Q00", datetime(2000, 10, 1)),
  2788. ("2000q4", datetime(2000, 10, 1)),
  2789. ("2000-Q4", datetime(2000, 10, 1)),
  2790. ("00-Q4", datetime(2000, 10, 1)),
  2791. ("4Q-2000", datetime(2000, 10, 1)),
  2792. ("4Q-00", datetime(2000, 10, 1)),
  2793. ("00q4", datetime(2000, 10, 1)),
  2794. ("2005", datetime(2005, 1, 1)),
  2795. ("2005-11", datetime(2005, 11, 1)),
  2796. ("2005 11", datetime(2005, 11, 1)),
  2797. ("11-2005", datetime(2005, 11, 1)),
  2798. ("11 2005", datetime(2005, 11, 1)),
  2799. ("200511", datetime(2020, 5, 11)),
  2800. ("20051109", datetime(2005, 11, 9)),
  2801. ("20051109 10:15", datetime(2005, 11, 9, 10, 15)),
  2802. ("20051109 08H", datetime(2005, 11, 9, 8, 0)),
  2803. ("2005-11-09 10:15", datetime(2005, 11, 9, 10, 15)),
  2804. ("2005-11-09 08H", datetime(2005, 11, 9, 8, 0)),
  2805. ("2005/11/09 10:15", datetime(2005, 11, 9, 10, 15)),
  2806. ("2005/11/09 10:15:32", datetime(2005, 11, 9, 10, 15, 32)),
  2807. ("2005/11/09 10:15:32 AM", datetime(2005, 11, 9, 10, 15, 32)),
  2808. ("2005/11/09 10:15:32 PM", datetime(2005, 11, 9, 22, 15, 32)),
  2809. ("2005/11/09 08H", datetime(2005, 11, 9, 8, 0)),
  2810. ("Thu Sep 25 10:36:28 2003", datetime(2003, 9, 25, 10, 36, 28)),
  2811. ("Thu Sep 25 2003", datetime(2003, 9, 25)),
  2812. ("Sep 25 2003", datetime(2003, 9, 25)),
  2813. ("January 1 2014", datetime(2014, 1, 1)),
  2814. # GH#10537
  2815. ("2014-06", datetime(2014, 6, 1)),
  2816. ("06-2014", datetime(2014, 6, 1)),
  2817. ("2014-6", datetime(2014, 6, 1)),
  2818. ("6-2014", datetime(2014, 6, 1)),
  2819. ("20010101 12", datetime(2001, 1, 1, 12)),
  2820. ("20010101 1234", datetime(2001, 1, 1, 12, 34)),
  2821. ("20010101 123456", datetime(2001, 1, 1, 12, 34, 56)),
  2822. ],
  2823. )
  2824. def test_parsers(self, date_str, expected, cache):
  2825. # dateutil >= 2.5.0 defaults to yearfirst=True
  2826. # https://github.com/dateutil/dateutil/issues/217
  2827. yearfirst = True
  2828. result1, _ = parsing.parse_datetime_string_with_reso(
  2829. date_str, yearfirst=yearfirst
  2830. )
  2831. result2 = to_datetime(date_str, yearfirst=yearfirst)
  2832. result3 = to_datetime([date_str], yearfirst=yearfirst)
  2833. # result5 is used below
  2834. result4 = to_datetime(
  2835. np.array([date_str], dtype=object), yearfirst=yearfirst, cache=cache
  2836. )
  2837. result6 = DatetimeIndex([date_str], yearfirst=yearfirst)
  2838. # result7 is used below
  2839. result8 = DatetimeIndex(Index([date_str]), yearfirst=yearfirst)
  2840. result9 = DatetimeIndex(Series([date_str]), yearfirst=yearfirst)
  2841. for res in [result1, result2]:
  2842. assert res == expected
  2843. for res in [result3, result4, result6, result8, result9]:
  2844. exp = DatetimeIndex([Timestamp(expected)])
  2845. tm.assert_index_equal(res, exp)
  2846. # these really need to have yearfirst, but we don't support
  2847. if not yearfirst:
  2848. result5 = Timestamp(date_str)
  2849. assert result5 == expected
  2850. result7 = date_range(date_str, freq="S", periods=1, yearfirst=yearfirst)
  2851. assert result7 == expected
  2852. def test_na_values_with_cache(
  2853. self, cache, unique_nulls_fixture, unique_nulls_fixture2
  2854. ):
  2855. # GH22305
  2856. expected = Index([NaT, NaT], dtype="datetime64[ns]")
  2857. result = to_datetime([unique_nulls_fixture, unique_nulls_fixture2], cache=cache)
  2858. tm.assert_index_equal(result, expected)
  2859. def test_parsers_nat(self):
  2860. # Test that each of several string-accepting methods return pd.NaT
  2861. result1, _ = parsing.parse_datetime_string_with_reso("NaT")
  2862. result2 = to_datetime("NaT")
  2863. result3 = Timestamp("NaT")
  2864. result4 = DatetimeIndex(["NaT"])[0]
  2865. assert result1 is NaT
  2866. assert result2 is NaT
  2867. assert result3 is NaT
  2868. assert result4 is NaT
  2869. @pytest.mark.parametrize(
  2870. "date_str, dayfirst, yearfirst, expected",
  2871. [
  2872. ("10-11-12", False, False, datetime(2012, 10, 11)),
  2873. ("10-11-12", True, False, datetime(2012, 11, 10)),
  2874. ("10-11-12", False, True, datetime(2010, 11, 12)),
  2875. ("10-11-12", True, True, datetime(2010, 12, 11)),
  2876. ("20/12/21", False, False, datetime(2021, 12, 20)),
  2877. ("20/12/21", True, False, datetime(2021, 12, 20)),
  2878. ("20/12/21", False, True, datetime(2020, 12, 21)),
  2879. ("20/12/21", True, True, datetime(2020, 12, 21)),
  2880. ],
  2881. )
  2882. def test_parsers_dayfirst_yearfirst(
  2883. self, cache, date_str, dayfirst, yearfirst, expected
  2884. ):
  2885. # OK
  2886. # 2.5.1 10-11-12 [dayfirst=0, yearfirst=0] -> 2012-10-11 00:00:00
  2887. # 2.5.2 10-11-12 [dayfirst=0, yearfirst=1] -> 2012-10-11 00:00:00
  2888. # 2.5.3 10-11-12 [dayfirst=0, yearfirst=0] -> 2012-10-11 00:00:00
  2889. # OK
  2890. # 2.5.1 10-11-12 [dayfirst=0, yearfirst=1] -> 2010-11-12 00:00:00
  2891. # 2.5.2 10-11-12 [dayfirst=0, yearfirst=1] -> 2010-11-12 00:00:00
  2892. # 2.5.3 10-11-12 [dayfirst=0, yearfirst=1] -> 2010-11-12 00:00:00
  2893. # bug fix in 2.5.2
  2894. # 2.5.1 10-11-12 [dayfirst=1, yearfirst=1] -> 2010-11-12 00:00:00
  2895. # 2.5.2 10-11-12 [dayfirst=1, yearfirst=1] -> 2010-12-11 00:00:00
  2896. # 2.5.3 10-11-12 [dayfirst=1, yearfirst=1] -> 2010-12-11 00:00:00
  2897. # OK
  2898. # 2.5.1 10-11-12 [dayfirst=1, yearfirst=0] -> 2012-11-10 00:00:00
  2899. # 2.5.2 10-11-12 [dayfirst=1, yearfirst=0] -> 2012-11-10 00:00:00
  2900. # 2.5.3 10-11-12 [dayfirst=1, yearfirst=0] -> 2012-11-10 00:00:00
  2901. # OK
  2902. # 2.5.1 20/12/21 [dayfirst=0, yearfirst=0] -> 2021-12-20 00:00:00
  2903. # 2.5.2 20/12/21 [dayfirst=0, yearfirst=0] -> 2021-12-20 00:00:00
  2904. # 2.5.3 20/12/21 [dayfirst=0, yearfirst=0] -> 2021-12-20 00:00:00
  2905. # OK
  2906. # 2.5.1 20/12/21 [dayfirst=0, yearfirst=1] -> 2020-12-21 00:00:00
  2907. # 2.5.2 20/12/21 [dayfirst=0, yearfirst=1] -> 2020-12-21 00:00:00
  2908. # 2.5.3 20/12/21 [dayfirst=0, yearfirst=1] -> 2020-12-21 00:00:00
  2909. # revert of bug in 2.5.2
  2910. # 2.5.1 20/12/21 [dayfirst=1, yearfirst=1] -> 2020-12-21 00:00:00
  2911. # 2.5.2 20/12/21 [dayfirst=1, yearfirst=1] -> month must be in 1..12
  2912. # 2.5.3 20/12/21 [dayfirst=1, yearfirst=1] -> 2020-12-21 00:00:00
  2913. # OK
  2914. # 2.5.1 20/12/21 [dayfirst=1, yearfirst=0] -> 2021-12-20 00:00:00
  2915. # 2.5.2 20/12/21 [dayfirst=1, yearfirst=0] -> 2021-12-20 00:00:00
  2916. # 2.5.3 20/12/21 [dayfirst=1, yearfirst=0] -> 2021-12-20 00:00:00
  2917. # str : dayfirst, yearfirst, expected
  2918. # compare with dateutil result
  2919. dateutil_result = parse(date_str, dayfirst=dayfirst, yearfirst=yearfirst)
  2920. assert dateutil_result == expected
  2921. result1, _ = parsing.parse_datetime_string_with_reso(
  2922. date_str, dayfirst=dayfirst, yearfirst=yearfirst
  2923. )
  2924. # we don't support dayfirst/yearfirst here:
  2925. if not dayfirst and not yearfirst:
  2926. result2 = Timestamp(date_str)
  2927. assert result2 == expected
  2928. result3 = to_datetime(
  2929. date_str, dayfirst=dayfirst, yearfirst=yearfirst, cache=cache
  2930. )
  2931. result4 = DatetimeIndex([date_str], dayfirst=dayfirst, yearfirst=yearfirst)[0]
  2932. assert result1 == expected
  2933. assert result3 == expected
  2934. assert result4 == expected
  2935. @pytest.mark.parametrize(
  2936. "date_str, exp_def",
  2937. [["10:15", datetime(1, 1, 1, 10, 15)], ["9:05", datetime(1, 1, 1, 9, 5)]],
  2938. )
  2939. def test_parsers_timestring(self, date_str, exp_def):
  2940. # must be the same as dateutil result
  2941. exp_now = parse(date_str)
  2942. result1, _ = parsing.parse_datetime_string_with_reso(date_str)
  2943. result2 = to_datetime(date_str)
  2944. result3 = to_datetime([date_str])
  2945. result4 = Timestamp(date_str)
  2946. result5 = DatetimeIndex([date_str])[0]
  2947. # parse time string return time string based on default date
  2948. # others are not, and can't be changed because it is used in
  2949. # time series plot
  2950. assert result1 == exp_def
  2951. assert result2 == exp_now
  2952. assert result3 == exp_now
  2953. assert result4 == exp_now
  2954. assert result5 == exp_now
  2955. @pytest.mark.parametrize(
  2956. "dt_string, tz, dt_string_repr",
  2957. [
  2958. (
  2959. "2013-01-01 05:45+0545",
  2960. timezone(timedelta(minutes=345)),
  2961. "Timestamp('2013-01-01 05:45:00+0545', tz='UTC+05:45')",
  2962. ),
  2963. (
  2964. "2013-01-01 05:30+0530",
  2965. timezone(timedelta(minutes=330)),
  2966. "Timestamp('2013-01-01 05:30:00+0530', tz='UTC+05:30')",
  2967. ),
  2968. ],
  2969. )
  2970. def test_parsers_timezone_minute_offsets_roundtrip(
  2971. self, cache, dt_string, tz, dt_string_repr
  2972. ):
  2973. # GH11708
  2974. base = to_datetime("2013-01-01 00:00:00", cache=cache)
  2975. base = base.tz_localize("UTC").tz_convert(tz)
  2976. dt_time = to_datetime(dt_string, cache=cache)
  2977. assert base == dt_time
  2978. assert dt_string_repr == repr(dt_time)
  2979. @pytest.fixture(params=["D", "s", "ms", "us", "ns"])
  2980. def units(request):
  2981. """Day and some time units.
  2982. * D
  2983. * s
  2984. * ms
  2985. * us
  2986. * ns
  2987. """
  2988. return request.param
  2989. @pytest.fixture
  2990. def epoch_1960():
  2991. """Timestamp at 1960-01-01."""
  2992. return Timestamp("1960-01-01")
  2993. @pytest.fixture
  2994. def units_from_epochs():
  2995. return list(range(5))
  2996. @pytest.fixture(params=["timestamp", "pydatetime", "datetime64", "str_1960"])
  2997. def epochs(epoch_1960, request):
  2998. """Timestamp at 1960-01-01 in various forms.
  2999. * Timestamp
  3000. * datetime.datetime
  3001. * numpy.datetime64
  3002. * str
  3003. """
  3004. assert request.param in {"timestamp", "pydatetime", "datetime64", "str_1960"}
  3005. if request.param == "timestamp":
  3006. return epoch_1960
  3007. elif request.param == "pydatetime":
  3008. return epoch_1960.to_pydatetime()
  3009. elif request.param == "datetime64":
  3010. return epoch_1960.to_datetime64()
  3011. else:
  3012. return str(epoch_1960)
  3013. @pytest.fixture
  3014. def julian_dates():
  3015. return date_range("2014-1-1", periods=10).to_julian_date().values
  3016. class TestOrigin:
  3017. def test_origin_and_unit(self):
  3018. # GH#42624
  3019. ts = to_datetime(1, unit="s", origin=1)
  3020. expected = Timestamp("1970-01-01 00:00:02")
  3021. assert ts == expected
  3022. ts = to_datetime(1, unit="s", origin=1_000_000_000)
  3023. expected = Timestamp("2001-09-09 01:46:41")
  3024. assert ts == expected
  3025. def test_julian(self, julian_dates):
  3026. # gh-11276, gh-11745
  3027. # for origin as julian
  3028. result = Series(to_datetime(julian_dates, unit="D", origin="julian"))
  3029. expected = Series(
  3030. to_datetime(julian_dates - Timestamp(0).to_julian_date(), unit="D")
  3031. )
  3032. tm.assert_series_equal(result, expected)
  3033. def test_unix(self):
  3034. result = Series(to_datetime([0, 1, 2], unit="D", origin="unix"))
  3035. expected = Series(
  3036. [Timestamp("1970-01-01"), Timestamp("1970-01-02"), Timestamp("1970-01-03")],
  3037. dtype="M8[ns]",
  3038. )
  3039. tm.assert_series_equal(result, expected)
  3040. def test_julian_round_trip(self):
  3041. result = to_datetime(2456658, origin="julian", unit="D")
  3042. assert result.to_julian_date() == 2456658
  3043. # out-of-bounds
  3044. msg = "1 is Out of Bounds for origin='julian'"
  3045. with pytest.raises(ValueError, match=msg):
  3046. to_datetime(1, origin="julian", unit="D")
  3047. def test_invalid_unit(self, units, julian_dates):
  3048. # checking for invalid combination of origin='julian' and unit != D
  3049. if units != "D":
  3050. msg = "unit must be 'D' for origin='julian'"
  3051. with pytest.raises(ValueError, match=msg):
  3052. to_datetime(julian_dates, unit=units, origin="julian")
  3053. @pytest.mark.parametrize("unit", ["ns", "D"])
  3054. def test_invalid_origin(self, unit):
  3055. # need to have a numeric specified
  3056. msg = "it must be numeric with a unit specified"
  3057. with pytest.raises(ValueError, match=msg):
  3058. to_datetime("2005-01-01", origin="1960-01-01", unit=unit)
  3059. @pytest.mark.parametrize(
  3060. "epochs",
  3061. [
  3062. Timestamp(1960, 1, 1),
  3063. datetime(1960, 1, 1),
  3064. "1960-01-01",
  3065. np.datetime64("1960-01-01"),
  3066. ],
  3067. )
  3068. def test_epoch(self, units, epochs):
  3069. epoch_1960 = Timestamp(1960, 1, 1)
  3070. units_from_epochs = np.arange(5, dtype=np.int64)
  3071. expected = Series(
  3072. [pd.Timedelta(x, unit=units) + epoch_1960 for x in units_from_epochs]
  3073. )
  3074. result = Series(to_datetime(units_from_epochs, unit=units, origin=epochs))
  3075. tm.assert_series_equal(result, expected)
  3076. @pytest.mark.parametrize(
  3077. "origin, exc",
  3078. [
  3079. ("random_string", ValueError),
  3080. ("epoch", ValueError),
  3081. ("13-24-1990", ValueError),
  3082. (datetime(1, 1, 1), OutOfBoundsDatetime),
  3083. ],
  3084. )
  3085. def test_invalid_origins(self, origin, exc, units, units_from_epochs):
  3086. msg = "|".join(
  3087. [
  3088. f"origin {origin} is Out of Bounds",
  3089. f"origin {origin} cannot be converted to a Timestamp",
  3090. "Cannot cast .* to unit='ns' without overflow",
  3091. ]
  3092. )
  3093. with pytest.raises(exc, match=msg):
  3094. to_datetime(units_from_epochs, unit=units, origin=origin)
  3095. def test_invalid_origins_tzinfo(self):
  3096. # GH16842
  3097. with pytest.raises(ValueError, match="must be tz-naive"):
  3098. to_datetime(1, unit="D", origin=datetime(2000, 1, 1, tzinfo=pytz.utc))
  3099. def test_incorrect_value_exception(self):
  3100. # GH47495
  3101. msg = (
  3102. "Unknown datetime string format, unable to parse: yesterday, at position 1"
  3103. )
  3104. with pytest.raises(ValueError, match=msg):
  3105. to_datetime(["today", "yesterday"])
  3106. @pytest.mark.parametrize(
  3107. "format, warning",
  3108. [
  3109. (None, UserWarning),
  3110. ("%Y-%m-%d %H:%M:%S", None),
  3111. ("%Y-%d-%m %H:%M:%S", None),
  3112. ],
  3113. )
  3114. def test_to_datetime_out_of_bounds_with_format_arg(self, format, warning):
  3115. # see gh-23830
  3116. msg = r"^Out of bounds nanosecond timestamp: 2417-10-10 00:00:00, at position 0"
  3117. with pytest.raises(OutOfBoundsDatetime, match=msg):
  3118. to_datetime("2417-10-10 00:00:00", format=format)
  3119. @pytest.mark.parametrize(
  3120. "arg, origin, expected_str",
  3121. [
  3122. [200 * 365, "unix", "2169-11-13 00:00:00"],
  3123. [200 * 365, "1870-01-01", "2069-11-13 00:00:00"],
  3124. [300 * 365, "1870-01-01", "2169-10-20 00:00:00"],
  3125. ],
  3126. )
  3127. def test_processing_order(self, arg, origin, expected_str):
  3128. # make sure we handle out-of-bounds *before*
  3129. # constructing the dates
  3130. result = to_datetime(arg, unit="D", origin=origin)
  3131. expected = Timestamp(expected_str)
  3132. assert result == expected
  3133. result = to_datetime(200 * 365, unit="D", origin="1870-01-01")
  3134. expected = Timestamp("2069-11-13 00:00:00")
  3135. assert result == expected
  3136. result = to_datetime(300 * 365, unit="D", origin="1870-01-01")
  3137. expected = Timestamp("2169-10-20 00:00:00")
  3138. assert result == expected
  3139. @pytest.mark.parametrize(
  3140. "offset,utc,exp",
  3141. [
  3142. ["Z", True, "2019-01-01T00:00:00.000Z"],
  3143. ["Z", None, "2019-01-01T00:00:00.000Z"],
  3144. ["-01:00", True, "2019-01-01T01:00:00.000Z"],
  3145. ["-01:00", None, "2019-01-01T00:00:00.000-01:00"],
  3146. ],
  3147. )
  3148. def test_arg_tz_ns_unit(self, offset, utc, exp):
  3149. # GH 25546
  3150. arg = "2019-01-01T00:00:00.000" + offset
  3151. result = to_datetime([arg], unit="ns", utc=utc)
  3152. expected = to_datetime([exp]).as_unit("ns")
  3153. tm.assert_index_equal(result, expected)
  3154. class TestShouldCache:
  3155. @pytest.mark.parametrize(
  3156. "listlike,do_caching",
  3157. [
  3158. ([1, 2, 3, 4, 5, 6, 7, 8, 9, 0], False),
  3159. ([1, 1, 1, 1, 4, 5, 6, 7, 8, 9], True),
  3160. ],
  3161. )
  3162. def test_should_cache(self, listlike, do_caching):
  3163. assert (
  3164. tools.should_cache(listlike, check_count=len(listlike), unique_share=0.7)
  3165. == do_caching
  3166. )
  3167. @pytest.mark.parametrize(
  3168. "unique_share,check_count, err_message",
  3169. [
  3170. (0.5, 11, r"check_count must be in next bounds: \[0; len\(arg\)\]"),
  3171. (10, 2, r"unique_share must be in next bounds: \(0; 1\)"),
  3172. ],
  3173. )
  3174. def test_should_cache_errors(self, unique_share, check_count, err_message):
  3175. arg = [5] * 10
  3176. with pytest.raises(AssertionError, match=err_message):
  3177. tools.should_cache(arg, unique_share, check_count)
  3178. @pytest.mark.parametrize(
  3179. "listlike",
  3180. [
  3181. (deque([Timestamp("2010-06-02 09:30:00")] * 51)),
  3182. ([Timestamp("2010-06-02 09:30:00")] * 51),
  3183. (tuple([Timestamp("2010-06-02 09:30:00")] * 51)),
  3184. ],
  3185. )
  3186. def test_no_slicing_errors_in_should_cache(self, listlike):
  3187. # GH#29403
  3188. assert tools.should_cache(listlike) is True
  3189. def test_nullable_integer_to_datetime():
  3190. # Test for #30050
  3191. ser = Series([1, 2, None, 2**61, None])
  3192. ser = ser.astype("Int64")
  3193. ser_copy = ser.copy()
  3194. res = to_datetime(ser, unit="ns")
  3195. expected = Series(
  3196. [
  3197. np.datetime64("1970-01-01 00:00:00.000000001"),
  3198. np.datetime64("1970-01-01 00:00:00.000000002"),
  3199. np.datetime64("NaT"),
  3200. np.datetime64("2043-01-25 23:56:49.213693952"),
  3201. np.datetime64("NaT"),
  3202. ]
  3203. )
  3204. tm.assert_series_equal(res, expected)
  3205. # Check that ser isn't mutated
  3206. tm.assert_series_equal(ser, ser_copy)
  3207. @pytest.mark.parametrize("klass", [np.array, list])
  3208. def test_na_to_datetime(nulls_fixture, klass):
  3209. if isinstance(nulls_fixture, Decimal):
  3210. with pytest.raises(TypeError, match="not convertible to datetime"):
  3211. to_datetime(klass([nulls_fixture]))
  3212. else:
  3213. result = to_datetime(klass([nulls_fixture]))
  3214. assert result[0] is NaT
  3215. @pytest.mark.parametrize("errors", ["raise", "coerce", "ignore"])
  3216. @pytest.mark.parametrize(
  3217. "args, format",
  3218. [
  3219. (["03/24/2016", "03/25/2016", ""], "%m/%d/%Y"),
  3220. (["2016-03-24", "2016-03-25", ""], "%Y-%m-%d"),
  3221. ],
  3222. ids=["non-ISO8601", "ISO8601"],
  3223. )
  3224. def test_empty_string_datetime(errors, args, format):
  3225. # GH13044, GH50251
  3226. td = Series(args)
  3227. # coerce empty string to pd.NaT
  3228. result = to_datetime(td, format=format, errors=errors)
  3229. expected = Series(["2016-03-24", "2016-03-25", NaT], dtype="datetime64[ns]")
  3230. tm.assert_series_equal(expected, result)
  3231. def test_empty_string_datetime_coerce__unit():
  3232. # GH13044
  3233. # coerce empty string to pd.NaT
  3234. result = to_datetime([1, ""], unit="s", errors="coerce")
  3235. expected = DatetimeIndex(["1970-01-01 00:00:01", "NaT"], dtype="datetime64[ns]")
  3236. tm.assert_index_equal(expected, result)
  3237. # verify that no exception is raised even when errors='raise' is set
  3238. result = to_datetime([1, ""], unit="s", errors="raise")
  3239. tm.assert_index_equal(expected, result)
  3240. @pytest.mark.parametrize("cache", [True, False])
  3241. def test_to_datetime_monotonic_increasing_index(cache):
  3242. # GH28238
  3243. cstart = start_caching_at
  3244. times = date_range(Timestamp("1980"), periods=cstart, freq="YS")
  3245. times = times.to_frame(index=False, name="DT").sample(n=cstart, random_state=1)
  3246. times.index = times.index.to_series().astype(float) / 1000
  3247. result = to_datetime(times.iloc[:, 0], cache=cache)
  3248. expected = times.iloc[:, 0]
  3249. tm.assert_series_equal(result, expected)
  3250. @pytest.mark.parametrize(
  3251. "series_length",
  3252. [40, start_caching_at, (start_caching_at + 1), (start_caching_at + 5)],
  3253. )
  3254. def test_to_datetime_cache_coerce_50_lines_outofbounds(series_length):
  3255. # GH#45319
  3256. ser = Series(
  3257. [datetime.fromisoformat("1446-04-12 00:00:00+00:00")]
  3258. + ([datetime.fromisoformat("1991-10-20 00:00:00+00:00")] * series_length),
  3259. dtype=object,
  3260. )
  3261. result1 = to_datetime(ser, errors="coerce", utc=True)
  3262. expected1 = Series(
  3263. [NaT] + ([Timestamp("1991-10-20 00:00:00+00:00")] * series_length)
  3264. )
  3265. tm.assert_series_equal(result1, expected1)
  3266. result2 = to_datetime(ser, errors="ignore", utc=True)
  3267. expected2 = Series(
  3268. [datetime.fromisoformat("1446-04-12 00:00:00+00:00")]
  3269. + ([datetime.fromisoformat("1991-10-20 00:00:00+00:00")] * series_length)
  3270. )
  3271. tm.assert_series_equal(result2, expected2)
  3272. with pytest.raises(OutOfBoundsDatetime, match="Out of bounds nanosecond timestamp"):
  3273. to_datetime(ser, errors="raise", utc=True)
  3274. def test_to_datetime_format_f_parse_nanos():
  3275. # GH 48767
  3276. timestamp = "15/02/2020 02:03:04.123456789"
  3277. timestamp_format = "%d/%m/%Y %H:%M:%S.%f"
  3278. result = to_datetime(timestamp, format=timestamp_format)
  3279. expected = Timestamp(
  3280. year=2020,
  3281. month=2,
  3282. day=15,
  3283. hour=2,
  3284. minute=3,
  3285. second=4,
  3286. microsecond=123456,
  3287. nanosecond=789,
  3288. )
  3289. assert result == expected
  3290. def test_to_datetime_mixed_iso8601():
  3291. # https://github.com/pandas-dev/pandas/issues/50411
  3292. result = to_datetime(["2020-01-01", "2020-01-01 05:00:00"], format="ISO8601")
  3293. expected = DatetimeIndex(["2020-01-01 00:00:00", "2020-01-01 05:00:00"])
  3294. tm.assert_index_equal(result, expected)
  3295. def test_to_datetime_mixed_other():
  3296. # https://github.com/pandas-dev/pandas/issues/50411
  3297. result = to_datetime(["01/11/2000", "12 January 2000"], format="mixed")
  3298. expected = DatetimeIndex(["2000-01-11", "2000-01-12"])
  3299. tm.assert_index_equal(result, expected)
  3300. @pytest.mark.parametrize("exact", [True, False])
  3301. @pytest.mark.parametrize("format", ["ISO8601", "mixed"])
  3302. def test_to_datetime_mixed_or_iso_exact(exact, format):
  3303. msg = "Cannot use 'exact' when 'format' is 'mixed' or 'ISO8601'"
  3304. with pytest.raises(ValueError, match=msg):
  3305. to_datetime(["2020-01-01"], exact=exact, format=format)
  3306. def test_to_datetime_mixed_not_necessarily_iso8601_raise():
  3307. # https://github.com/pandas-dev/pandas/issues/50411
  3308. with pytest.raises(
  3309. ValueError, match="Time data 01-01-2000 is not ISO8601 format, at position 1"
  3310. ):
  3311. to_datetime(["2020-01-01", "01-01-2000"], format="ISO8601")
  3312. @pytest.mark.parametrize(
  3313. ("errors", "expected"),
  3314. [
  3315. ("coerce", DatetimeIndex(["2020-01-01 00:00:00", NaT])),
  3316. ("ignore", Index(["2020-01-01", "01-01-2000"], dtype="str")),
  3317. ],
  3318. )
  3319. def test_to_datetime_mixed_not_necessarily_iso8601_coerce(errors, expected):
  3320. # https://github.com/pandas-dev/pandas/issues/50411
  3321. result = to_datetime(["2020-01-01", "01-01-2000"], format="ISO8601", errors=errors)
  3322. tm.assert_index_equal(result, expected)
  3323. def test_ignoring_unknown_tz_deprecated():
  3324. # GH#18702, GH#51476
  3325. dtstr = "2014 Jan 9 05:15 FAKE"
  3326. msg = 'un-recognized timezone "FAKE". Dropping unrecognized timezones is deprecated'
  3327. with tm.assert_produces_warning(FutureWarning, match=msg):
  3328. res = Timestamp(dtstr)
  3329. assert res == Timestamp(dtstr[:-5])
  3330. with tm.assert_produces_warning(FutureWarning):
  3331. res = to_datetime(dtstr)
  3332. assert res == to_datetime(dtstr[:-5])
  3333. with tm.assert_produces_warning(FutureWarning):
  3334. res = to_datetime([dtstr])
  3335. tm.assert_index_equal(res, to_datetime([dtstr[:-5]]))
  3336. def test_from_numeric_arrow_dtype(any_numeric_ea_dtype):
  3337. # GH 52425
  3338. pytest.importorskip("pyarrow")
  3339. ser = Series([1, 2], dtype=f"{any_numeric_ea_dtype.lower()}[pyarrow]")
  3340. result = to_datetime(ser)
  3341. expected = Series([1, 2], dtype="datetime64[ns]")
  3342. tm.assert_series_equal(result, expected)
  3343. def test_to_datetime_with_empty_str_utc_false_format_mixed():
  3344. # GH 50887
  3345. vals = ["2020-01-01 00:00+00:00", ""]
  3346. result = to_datetime(vals, format="mixed")
  3347. expected = Index([Timestamp("2020-01-01 00:00+00:00"), "NaT"], dtype="M8[ns, UTC]")
  3348. tm.assert_index_equal(result, expected)
  3349. # Check that a couple of other similar paths work the same way
  3350. alt = to_datetime(vals)
  3351. tm.assert_index_equal(alt, expected)
  3352. alt2 = DatetimeIndex(vals)
  3353. tm.assert_index_equal(alt2, expected)
  3354. def test_to_datetime_with_empty_str_utc_false_offsets_and_format_mixed():
  3355. # GH 50887
  3356. msg = "parsing datetimes with mixed time zones will raise an error"
  3357. with tm.assert_produces_warning(FutureWarning, match=msg):
  3358. to_datetime(
  3359. ["2020-01-01 00:00+00:00", "2020-01-01 00:00+02:00", ""], format="mixed"
  3360. )
  3361. def test_to_datetime_mixed_tzs_mixed_types():
  3362. # GH#55793, GH#55693 mismatched tzs but one is str and other is
  3363. # datetime object
  3364. ts = Timestamp("2016-01-02 03:04:05", tz="US/Pacific")
  3365. dtstr = "2023-10-30 15:06+01"
  3366. arr = [ts, dtstr]
  3367. msg = (
  3368. "Mixed timezones detected. pass utc=True in to_datetime or tz='UTC' "
  3369. "in DatetimeIndex to convert to a common timezone"
  3370. )
  3371. with pytest.raises(ValueError, match=msg):
  3372. to_datetime(arr)
  3373. with pytest.raises(ValueError, match=msg):
  3374. to_datetime(arr, format="mixed")
  3375. with pytest.raises(ValueError, match=msg):
  3376. DatetimeIndex(arr)
  3377. def test_to_datetime_mixed_types_matching_tzs():
  3378. # GH#55793
  3379. dtstr = "2023-11-01 09:22:03-07:00"
  3380. ts = Timestamp(dtstr)
  3381. arr = [ts, dtstr]
  3382. res1 = to_datetime(arr)
  3383. res2 = to_datetime(arr[::-1])[::-1]
  3384. res3 = to_datetime(arr, format="mixed")
  3385. res4 = DatetimeIndex(arr)
  3386. expected = DatetimeIndex([ts, ts])
  3387. tm.assert_index_equal(res1, expected)
  3388. tm.assert_index_equal(res2, expected)
  3389. tm.assert_index_equal(res3, expected)
  3390. tm.assert_index_equal(res4, expected)
  3391. dtstr = "2020-01-01 00:00+00:00"
  3392. ts = Timestamp(dtstr)
  3393. @pytest.mark.filterwarnings("ignore:Could not infer format:UserWarning")
  3394. @pytest.mark.parametrize(
  3395. "aware_val",
  3396. [dtstr, Timestamp(dtstr)],
  3397. ids=lambda x: type(x).__name__,
  3398. )
  3399. @pytest.mark.parametrize(
  3400. "naive_val",
  3401. [dtstr[:-6], ts.tz_localize(None), ts.date(), ts.asm8, ts.value, float(ts.value)],
  3402. ids=lambda x: type(x).__name__,
  3403. )
  3404. @pytest.mark.parametrize("naive_first", [True, False])
  3405. def test_to_datetime_mixed_awareness_mixed_types(aware_val, naive_val, naive_first):
  3406. # GH#55793, GH#55693
  3407. # Empty string parses to NaT
  3408. vals = [aware_val, naive_val, ""]
  3409. vec = vals
  3410. if naive_first:
  3411. # alas, the behavior is order-dependent, so we test both ways
  3412. vec = [naive_val, aware_val, ""]
  3413. # both_strs-> paths that were previously already deprecated with warning
  3414. # issued in _array_to_datetime_object
  3415. both_strs = isinstance(aware_val, str) and isinstance(naive_val, str)
  3416. has_numeric = isinstance(naive_val, (int, float))
  3417. depr_msg = "In a future version of pandas, parsing datetimes with mixed time zones"
  3418. first_non_null = next(x for x in vec if x != "")
  3419. # if first_non_null is a not a string, _guess_datetime_format_for_array
  3420. # doesn't guess a format so we don't go through array_strptime
  3421. if not isinstance(first_non_null, str):
  3422. # that case goes through array_strptime which has different behavior
  3423. msg = "Cannot mix tz-aware with tz-naive values"
  3424. if naive_first and isinstance(aware_val, Timestamp):
  3425. if isinstance(naive_val, Timestamp):
  3426. msg = "Tz-aware datetime.datetime cannot be converted to datetime64"
  3427. with pytest.raises(ValueError, match=msg):
  3428. to_datetime(vec)
  3429. else:
  3430. with pytest.raises(ValueError, match=msg):
  3431. to_datetime(vec)
  3432. # No warning/error with utc=True
  3433. to_datetime(vec, utc=True)
  3434. elif has_numeric and vec.index(aware_val) < vec.index(naive_val):
  3435. msg = "time data .* doesn't match format"
  3436. with pytest.raises(ValueError, match=msg):
  3437. to_datetime(vec)
  3438. with pytest.raises(ValueError, match=msg):
  3439. to_datetime(vec, utc=True)
  3440. elif both_strs and vec.index(aware_val) < vec.index(naive_val):
  3441. msg = r"time data \"2020-01-01 00:00\" doesn't match format"
  3442. with pytest.raises(ValueError, match=msg):
  3443. to_datetime(vec)
  3444. with pytest.raises(ValueError, match=msg):
  3445. to_datetime(vec, utc=True)
  3446. elif both_strs and vec.index(naive_val) < vec.index(aware_val):
  3447. msg = "unconverted data remains when parsing with format"
  3448. with pytest.raises(ValueError, match=msg):
  3449. to_datetime(vec)
  3450. with pytest.raises(ValueError, match=msg):
  3451. to_datetime(vec, utc=True)
  3452. else:
  3453. with tm.assert_produces_warning(FutureWarning, match=depr_msg):
  3454. to_datetime(vec)
  3455. # No warning/error with utc=True
  3456. to_datetime(vec, utc=True)
  3457. if both_strs:
  3458. with tm.assert_produces_warning(FutureWarning, match=depr_msg):
  3459. to_datetime(vec, format="mixed")
  3460. with tm.assert_produces_warning(FutureWarning, match=depr_msg):
  3461. msg = "DatetimeIndex has mixed timezones"
  3462. with pytest.raises(TypeError, match=msg):
  3463. DatetimeIndex(vec)
  3464. else:
  3465. msg = "Cannot mix tz-aware with tz-naive values"
  3466. if naive_first and isinstance(aware_val, Timestamp):
  3467. if isinstance(naive_val, Timestamp):
  3468. msg = "Tz-aware datetime.datetime cannot be converted to datetime64"
  3469. with pytest.raises(ValueError, match=msg):
  3470. to_datetime(vec, format="mixed")
  3471. with pytest.raises(ValueError, match=msg):
  3472. DatetimeIndex(vec)
  3473. else:
  3474. with pytest.raises(ValueError, match=msg):
  3475. to_datetime(vec, format="mixed")
  3476. with pytest.raises(ValueError, match=msg):
  3477. DatetimeIndex(vec)