test_datetimes.py 28 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840
  1. """
  2. Tests for DatetimeArray
  3. """
  4. from __future__ import annotations
  5. from datetime import timedelta
  6. import operator
  7. try:
  8. from zoneinfo import ZoneInfo
  9. except ImportError:
  10. # Cannot assign to a type
  11. ZoneInfo = None # type: ignore[misc, assignment]
  12. import numpy as np
  13. import pytest
  14. from pandas._libs.tslibs import tz_compare
  15. from pandas.core.dtypes.dtypes import DatetimeTZDtype
  16. import pandas as pd
  17. import pandas._testing as tm
  18. from pandas.core.arrays import (
  19. DatetimeArray,
  20. TimedeltaArray,
  21. )
  22. class TestNonNano:
  23. @pytest.fixture(params=["s", "ms", "us"])
  24. def unit(self, request):
  25. """Fixture returning parametrized time units"""
  26. return request.param
  27. @pytest.fixture
  28. def dtype(self, unit, tz_naive_fixture):
  29. tz = tz_naive_fixture
  30. if tz is None:
  31. return np.dtype(f"datetime64[{unit}]")
  32. else:
  33. return DatetimeTZDtype(unit=unit, tz=tz)
  34. @pytest.fixture
  35. def dta_dti(self, unit, dtype):
  36. tz = getattr(dtype, "tz", None)
  37. dti = pd.date_range("2016-01-01", periods=55, freq="D", tz=tz)
  38. if tz is None:
  39. arr = np.asarray(dti).astype(f"M8[{unit}]")
  40. else:
  41. arr = np.asarray(dti.tz_convert("UTC").tz_localize(None)).astype(
  42. f"M8[{unit}]"
  43. )
  44. dta = DatetimeArray._simple_new(arr, dtype=dtype)
  45. return dta, dti
  46. @pytest.fixture
  47. def dta(self, dta_dti):
  48. dta, dti = dta_dti
  49. return dta
  50. def test_non_nano(self, unit, dtype):
  51. arr = np.arange(5, dtype=np.int64).view(f"M8[{unit}]")
  52. dta = DatetimeArray._simple_new(arr, dtype=dtype)
  53. assert dta.dtype == dtype
  54. assert dta[0].unit == unit
  55. assert tz_compare(dta.tz, dta[0].tz)
  56. assert (dta[0] == dta[:1]).all()
  57. @pytest.mark.parametrize(
  58. "field", DatetimeArray._field_ops + DatetimeArray._bool_ops
  59. )
  60. def test_fields(self, unit, field, dtype, dta_dti):
  61. dta, dti = dta_dti
  62. assert (dti == dta).all()
  63. res = getattr(dta, field)
  64. expected = getattr(dti._data, field)
  65. tm.assert_numpy_array_equal(res, expected)
  66. def test_normalize(self, unit):
  67. dti = pd.date_range("2016-01-01 06:00:00", periods=55, freq="D")
  68. arr = np.asarray(dti).astype(f"M8[{unit}]")
  69. dta = DatetimeArray._simple_new(arr, dtype=arr.dtype)
  70. assert not dta.is_normalized
  71. # TODO: simplify once we can just .astype to other unit
  72. exp = np.asarray(dti.normalize()).astype(f"M8[{unit}]")
  73. expected = DatetimeArray._simple_new(exp, dtype=exp.dtype)
  74. res = dta.normalize()
  75. tm.assert_extension_array_equal(res, expected)
  76. def test_simple_new_requires_match(self, unit):
  77. arr = np.arange(5, dtype=np.int64).view(f"M8[{unit}]")
  78. dtype = DatetimeTZDtype(unit, "UTC")
  79. dta = DatetimeArray._simple_new(arr, dtype=dtype)
  80. assert dta.dtype == dtype
  81. wrong = DatetimeTZDtype("ns", "UTC")
  82. with pytest.raises(AssertionError, match=""):
  83. DatetimeArray._simple_new(arr, dtype=wrong)
  84. def test_std_non_nano(self, unit):
  85. dti = pd.date_range("2016-01-01", periods=55, freq="D")
  86. arr = np.asarray(dti).astype(f"M8[{unit}]")
  87. dta = DatetimeArray._simple_new(arr, dtype=arr.dtype)
  88. # we should match the nano-reso std, but floored to our reso.
  89. res = dta.std()
  90. assert res._creso == dta._creso
  91. assert res == dti.std().floor(unit)
  92. @pytest.mark.filterwarnings("ignore:Converting to PeriodArray.*:UserWarning")
  93. def test_to_period(self, dta_dti):
  94. dta, dti = dta_dti
  95. result = dta.to_period("D")
  96. expected = dti._data.to_period("D")
  97. tm.assert_extension_array_equal(result, expected)
  98. def test_iter(self, dta):
  99. res = next(iter(dta))
  100. expected = dta[0]
  101. assert type(res) is pd.Timestamp
  102. assert res._value == expected._value
  103. assert res._creso == expected._creso
  104. assert res == expected
  105. def test_astype_object(self, dta):
  106. result = dta.astype(object)
  107. assert all(x._creso == dta._creso for x in result)
  108. assert all(x == y for x, y in zip(result, dta))
  109. def test_to_pydatetime(self, dta_dti):
  110. dta, dti = dta_dti
  111. result = dta.to_pydatetime()
  112. expected = dti.to_pydatetime()
  113. tm.assert_numpy_array_equal(result, expected)
  114. @pytest.mark.parametrize("meth", ["time", "timetz", "date"])
  115. def test_time_date(self, dta_dti, meth):
  116. dta, dti = dta_dti
  117. result = getattr(dta, meth)
  118. expected = getattr(dti, meth)
  119. tm.assert_numpy_array_equal(result, expected)
  120. def test_format_native_types(self, unit, dtype, dta_dti):
  121. # In this case we should get the same formatted values with our nano
  122. # version dti._data as we do with the non-nano dta
  123. dta, dti = dta_dti
  124. res = dta._format_native_types()
  125. exp = dti._data._format_native_types()
  126. tm.assert_numpy_array_equal(res, exp)
  127. def test_repr(self, dta_dti, unit):
  128. dta, dti = dta_dti
  129. assert repr(dta) == repr(dti._data).replace("[ns", f"[{unit}")
  130. # TODO: tests with td64
  131. def test_compare_mismatched_resolutions(self, comparison_op):
  132. # comparison that numpy gets wrong bc of silent overflows
  133. op = comparison_op
  134. iinfo = np.iinfo(np.int64)
  135. vals = np.array([iinfo.min, iinfo.min + 1, iinfo.max], dtype=np.int64)
  136. # Construct so that arr2[1] < arr[1] < arr[2] < arr2[2]
  137. arr = np.array(vals).view("M8[ns]")
  138. arr2 = arr.view("M8[s]")
  139. left = DatetimeArray._simple_new(arr, dtype=arr.dtype)
  140. right = DatetimeArray._simple_new(arr2, dtype=arr2.dtype)
  141. if comparison_op is operator.eq:
  142. expected = np.array([False, False, False])
  143. elif comparison_op is operator.ne:
  144. expected = np.array([True, True, True])
  145. elif comparison_op in [operator.lt, operator.le]:
  146. expected = np.array([False, False, True])
  147. else:
  148. expected = np.array([False, True, False])
  149. result = op(left, right)
  150. tm.assert_numpy_array_equal(result, expected)
  151. result = op(left[1], right)
  152. tm.assert_numpy_array_equal(result, expected)
  153. if op not in [operator.eq, operator.ne]:
  154. # check that numpy still gets this wrong; if it is fixed we may be
  155. # able to remove compare_mismatched_resolutions
  156. np_res = op(left._ndarray, right._ndarray)
  157. tm.assert_numpy_array_equal(np_res[1:], ~expected[1:])
  158. def test_add_mismatched_reso_doesnt_downcast(self):
  159. # https://github.com/pandas-dev/pandas/pull/48748#issuecomment-1260181008
  160. td = pd.Timedelta(microseconds=1)
  161. dti = pd.date_range("2016-01-01", periods=3) - td
  162. dta = dti._data.as_unit("us")
  163. res = dta + td.as_unit("us")
  164. # even though the result is an even number of days
  165. # (so we _could_ downcast to unit="s"), we do not.
  166. assert res.unit == "us"
  167. @pytest.mark.parametrize(
  168. "scalar",
  169. [
  170. timedelta(hours=2),
  171. pd.Timedelta(hours=2),
  172. np.timedelta64(2, "h"),
  173. np.timedelta64(2 * 3600 * 1000, "ms"),
  174. pd.offsets.Minute(120),
  175. pd.offsets.Hour(2),
  176. ],
  177. )
  178. def test_add_timedeltalike_scalar_mismatched_reso(self, dta_dti, scalar):
  179. dta, dti = dta_dti
  180. td = pd.Timedelta(scalar)
  181. exp_unit = tm.get_finest_unit(dta.unit, td.unit)
  182. expected = (dti + td)._data.as_unit(exp_unit)
  183. result = dta + scalar
  184. tm.assert_extension_array_equal(result, expected)
  185. result = scalar + dta
  186. tm.assert_extension_array_equal(result, expected)
  187. expected = (dti - td)._data.as_unit(exp_unit)
  188. result = dta - scalar
  189. tm.assert_extension_array_equal(result, expected)
  190. def test_sub_datetimelike_scalar_mismatch(self):
  191. dti = pd.date_range("2016-01-01", periods=3)
  192. dta = dti._data.as_unit("us")
  193. ts = dta[0].as_unit("s")
  194. result = dta - ts
  195. expected = (dti - dti[0])._data.as_unit("us")
  196. assert result.dtype == "m8[us]"
  197. tm.assert_extension_array_equal(result, expected)
  198. def test_sub_datetime64_reso_mismatch(self):
  199. dti = pd.date_range("2016-01-01", periods=3)
  200. left = dti._data.as_unit("s")
  201. right = left.as_unit("ms")
  202. result = left - right
  203. exp_values = np.array([0, 0, 0], dtype="m8[ms]")
  204. expected = TimedeltaArray._simple_new(
  205. exp_values,
  206. dtype=exp_values.dtype,
  207. )
  208. tm.assert_extension_array_equal(result, expected)
  209. result2 = right - left
  210. tm.assert_extension_array_equal(result2, expected)
  211. class TestDatetimeArrayComparisons:
  212. # TODO: merge this into tests/arithmetic/test_datetime64 once it is
  213. # sufficiently robust
  214. def test_cmp_dt64_arraylike_tznaive(self, comparison_op):
  215. # arbitrary tz-naive DatetimeIndex
  216. op = comparison_op
  217. dti = pd.date_range("2016-01-1", freq="MS", periods=9, tz=None)
  218. arr = dti._data
  219. assert arr.freq == dti.freq
  220. assert arr.tz == dti.tz
  221. right = dti
  222. expected = np.ones(len(arr), dtype=bool)
  223. if comparison_op.__name__ in ["ne", "gt", "lt"]:
  224. # for these the comparisons should be all-False
  225. expected = ~expected
  226. result = op(arr, arr)
  227. tm.assert_numpy_array_equal(result, expected)
  228. for other in [
  229. right,
  230. np.array(right),
  231. list(right),
  232. tuple(right),
  233. right.astype(object),
  234. ]:
  235. result = op(arr, other)
  236. tm.assert_numpy_array_equal(result, expected)
  237. result = op(other, arr)
  238. tm.assert_numpy_array_equal(result, expected)
  239. class TestDatetimeArray:
  240. def test_astype_ns_to_ms_near_bounds(self):
  241. # GH#55979
  242. ts = pd.Timestamp("1677-09-21 00:12:43.145225")
  243. target = ts.as_unit("ms")
  244. dta = DatetimeArray._from_sequence([ts], dtype="M8[ns]")
  245. assert (dta.view("i8") == ts.as_unit("ns").value).all()
  246. result = dta.astype("M8[ms]")
  247. assert result[0] == target
  248. expected = DatetimeArray._from_sequence([ts], dtype="M8[ms]")
  249. assert (expected.view("i8") == target._value).all()
  250. tm.assert_datetime_array_equal(result, expected)
  251. def test_astype_non_nano_tznaive(self):
  252. dti = pd.date_range("2016-01-01", periods=3)
  253. res = dti.astype("M8[s]")
  254. assert res.dtype == "M8[s]"
  255. dta = dti._data
  256. res = dta.astype("M8[s]")
  257. assert res.dtype == "M8[s]"
  258. assert isinstance(res, pd.core.arrays.DatetimeArray) # used to be ndarray
  259. def test_astype_non_nano_tzaware(self):
  260. dti = pd.date_range("2016-01-01", periods=3, tz="UTC")
  261. res = dti.astype("M8[s, US/Pacific]")
  262. assert res.dtype == "M8[s, US/Pacific]"
  263. dta = dti._data
  264. res = dta.astype("M8[s, US/Pacific]")
  265. assert res.dtype == "M8[s, US/Pacific]"
  266. # from non-nano to non-nano, preserving reso
  267. res2 = res.astype("M8[s, UTC]")
  268. assert res2.dtype == "M8[s, UTC]"
  269. assert not tm.shares_memory(res2, res)
  270. res3 = res.astype("M8[s, UTC]", copy=False)
  271. assert res2.dtype == "M8[s, UTC]"
  272. assert tm.shares_memory(res3, res)
  273. def test_astype_to_same(self):
  274. arr = DatetimeArray._from_sequence(
  275. ["2000"], dtype=DatetimeTZDtype(tz="US/Central")
  276. )
  277. result = arr.astype(DatetimeTZDtype(tz="US/Central"), copy=False)
  278. assert result is arr
  279. @pytest.mark.parametrize("dtype", ["datetime64[ns]", "datetime64[ns, UTC]"])
  280. @pytest.mark.parametrize(
  281. "other", ["datetime64[ns]", "datetime64[ns, UTC]", "datetime64[ns, CET]"]
  282. )
  283. def test_astype_copies(self, dtype, other):
  284. # https://github.com/pandas-dev/pandas/pull/32490
  285. ser = pd.Series([1, 2], dtype=dtype)
  286. orig = ser.copy()
  287. err = False
  288. if (dtype == "datetime64[ns]") ^ (other == "datetime64[ns]"):
  289. # deprecated in favor of tz_localize
  290. err = True
  291. if err:
  292. if dtype == "datetime64[ns]":
  293. msg = "Use obj.tz_localize instead or series.dt.tz_localize instead"
  294. else:
  295. msg = "from timezone-aware dtype to timezone-naive dtype"
  296. with pytest.raises(TypeError, match=msg):
  297. ser.astype(other)
  298. else:
  299. t = ser.astype(other)
  300. t[:] = pd.NaT
  301. tm.assert_series_equal(ser, orig)
  302. @pytest.mark.parametrize("dtype", [int, np.int32, np.int64, "uint32", "uint64"])
  303. def test_astype_int(self, dtype):
  304. arr = DatetimeArray._from_sequence(
  305. [pd.Timestamp("2000"), pd.Timestamp("2001")], dtype="M8[ns]"
  306. )
  307. if np.dtype(dtype) != np.int64:
  308. with pytest.raises(TypeError, match=r"Do obj.astype\('int64'\)"):
  309. arr.astype(dtype)
  310. return
  311. result = arr.astype(dtype)
  312. expected = arr._ndarray.view("i8")
  313. tm.assert_numpy_array_equal(result, expected)
  314. def test_astype_to_sparse_dt64(self):
  315. # GH#50082
  316. dti = pd.date_range("2016-01-01", periods=4)
  317. dta = dti._data
  318. result = dta.astype("Sparse[datetime64[ns]]")
  319. assert result.dtype == "Sparse[datetime64[ns]]"
  320. assert (result == dta).all()
  321. def test_tz_setter_raises(self):
  322. arr = DatetimeArray._from_sequence(
  323. ["2000"], dtype=DatetimeTZDtype(tz="US/Central")
  324. )
  325. with pytest.raises(AttributeError, match="tz_localize"):
  326. arr.tz = "UTC"
  327. def test_setitem_str_impute_tz(self, tz_naive_fixture):
  328. # Like for getitem, if we are passed a naive-like string, we impute
  329. # our own timezone.
  330. tz = tz_naive_fixture
  331. data = np.array([1, 2, 3], dtype="M8[ns]")
  332. dtype = data.dtype if tz is None else DatetimeTZDtype(tz=tz)
  333. arr = DatetimeArray._from_sequence(data, dtype=dtype)
  334. expected = arr.copy()
  335. ts = pd.Timestamp("2020-09-08 16:50").tz_localize(tz)
  336. setter = str(ts.tz_localize(None))
  337. # Setting a scalar tznaive string
  338. expected[0] = ts
  339. arr[0] = setter
  340. tm.assert_equal(arr, expected)
  341. # Setting a listlike of tznaive strings
  342. expected[1] = ts
  343. arr[:2] = [setter, setter]
  344. tm.assert_equal(arr, expected)
  345. def test_setitem_different_tz_raises(self):
  346. # pre-2.0 we required exact tz match, in 2.0 we require only
  347. # tzawareness-match
  348. data = np.array([1, 2, 3], dtype="M8[ns]")
  349. arr = DatetimeArray._from_sequence(
  350. data, copy=False, dtype=DatetimeTZDtype(tz="US/Central")
  351. )
  352. with pytest.raises(TypeError, match="Cannot compare tz-naive and tz-aware"):
  353. arr[0] = pd.Timestamp("2000")
  354. ts = pd.Timestamp("2000", tz="US/Eastern")
  355. arr[0] = ts
  356. assert arr[0] == ts.tz_convert("US/Central")
  357. def test_setitem_clears_freq(self):
  358. a = pd.date_range("2000", periods=2, freq="D", tz="US/Central")._data
  359. a[0] = pd.Timestamp("2000", tz="US/Central")
  360. assert a.freq is None
  361. @pytest.mark.parametrize(
  362. "obj",
  363. [
  364. pd.Timestamp("2021-01-01"),
  365. pd.Timestamp("2021-01-01").to_datetime64(),
  366. pd.Timestamp("2021-01-01").to_pydatetime(),
  367. ],
  368. )
  369. def test_setitem_objects(self, obj):
  370. # make sure we accept datetime64 and datetime in addition to Timestamp
  371. dti = pd.date_range("2000", periods=2, freq="D")
  372. arr = dti._data
  373. arr[0] = obj
  374. assert arr[0] == obj
  375. def test_repeat_preserves_tz(self):
  376. dti = pd.date_range("2000", periods=2, freq="D", tz="US/Central")
  377. arr = dti._data
  378. repeated = arr.repeat([1, 1])
  379. # preserves tz and values, but not freq
  380. expected = DatetimeArray._from_sequence(arr.asi8, dtype=arr.dtype)
  381. tm.assert_equal(repeated, expected)
  382. def test_value_counts_preserves_tz(self):
  383. dti = pd.date_range("2000", periods=2, freq="D", tz="US/Central")
  384. arr = dti._data.repeat([4, 3])
  385. result = arr.value_counts()
  386. # Note: not tm.assert_index_equal, since `freq`s do not match
  387. assert result.index.equals(dti)
  388. arr[-2] = pd.NaT
  389. result = arr.value_counts(dropna=False)
  390. expected = pd.Series([4, 2, 1], index=[dti[0], dti[1], pd.NaT], name="count")
  391. tm.assert_series_equal(result, expected)
  392. @pytest.mark.parametrize("method", ["pad", "backfill"])
  393. def test_fillna_preserves_tz(self, method):
  394. dti = pd.date_range("2000-01-01", periods=5, freq="D", tz="US/Central")
  395. arr = DatetimeArray._from_sequence(dti, copy=True)
  396. arr[2] = pd.NaT
  397. fill_val = dti[1] if method == "pad" else dti[3]
  398. expected = DatetimeArray._from_sequence(
  399. [dti[0], dti[1], fill_val, dti[3], dti[4]],
  400. dtype=DatetimeTZDtype(tz="US/Central"),
  401. )
  402. result = arr._pad_or_backfill(method=method)
  403. tm.assert_extension_array_equal(result, expected)
  404. # assert that arr and dti were not modified in-place
  405. assert arr[2] is pd.NaT
  406. assert dti[2] == pd.Timestamp("2000-01-03", tz="US/Central")
  407. def test_fillna_2d(self):
  408. dti = pd.date_range("2016-01-01", periods=6, tz="US/Pacific")
  409. dta = dti._data.reshape(3, 2).copy()
  410. dta[0, 1] = pd.NaT
  411. dta[1, 0] = pd.NaT
  412. res1 = dta._pad_or_backfill(method="pad")
  413. expected1 = dta.copy()
  414. expected1[1, 0] = dta[0, 0]
  415. tm.assert_extension_array_equal(res1, expected1)
  416. res2 = dta._pad_or_backfill(method="backfill")
  417. expected2 = dta.copy()
  418. expected2 = dta.copy()
  419. expected2[1, 0] = dta[2, 0]
  420. expected2[0, 1] = dta[1, 1]
  421. tm.assert_extension_array_equal(res2, expected2)
  422. # with different ordering for underlying ndarray; behavior should
  423. # be unchanged
  424. dta2 = dta._from_backing_data(dta._ndarray.copy(order="F"))
  425. assert dta2._ndarray.flags["F_CONTIGUOUS"]
  426. assert not dta2._ndarray.flags["C_CONTIGUOUS"]
  427. tm.assert_extension_array_equal(dta, dta2)
  428. res3 = dta2._pad_or_backfill(method="pad")
  429. tm.assert_extension_array_equal(res3, expected1)
  430. res4 = dta2._pad_or_backfill(method="backfill")
  431. tm.assert_extension_array_equal(res4, expected2)
  432. # test the DataFrame method while we're here
  433. df = pd.DataFrame(dta)
  434. res = df.ffill()
  435. expected = pd.DataFrame(expected1)
  436. tm.assert_frame_equal(res, expected)
  437. res = df.bfill()
  438. expected = pd.DataFrame(expected2)
  439. tm.assert_frame_equal(res, expected)
  440. def test_array_interface_tz(self):
  441. tz = "US/Central"
  442. data = pd.date_range("2017", periods=2, tz=tz)._data
  443. result = np.asarray(data)
  444. expected = np.array(
  445. [
  446. pd.Timestamp("2017-01-01T00:00:00", tz=tz),
  447. pd.Timestamp("2017-01-02T00:00:00", tz=tz),
  448. ],
  449. dtype=object,
  450. )
  451. tm.assert_numpy_array_equal(result, expected)
  452. result = np.asarray(data, dtype=object)
  453. tm.assert_numpy_array_equal(result, expected)
  454. result = np.asarray(data, dtype="M8[ns]")
  455. expected = np.array(
  456. ["2017-01-01T06:00:00", "2017-01-02T06:00:00"], dtype="M8[ns]"
  457. )
  458. tm.assert_numpy_array_equal(result, expected)
  459. def test_array_interface(self):
  460. data = pd.date_range("2017", periods=2)._data
  461. expected = np.array(
  462. ["2017-01-01T00:00:00", "2017-01-02T00:00:00"], dtype="datetime64[ns]"
  463. )
  464. result = np.asarray(data)
  465. tm.assert_numpy_array_equal(result, expected)
  466. result = np.asarray(data, dtype=object)
  467. expected = np.array(
  468. [pd.Timestamp("2017-01-01T00:00:00"), pd.Timestamp("2017-01-02T00:00:00")],
  469. dtype=object,
  470. )
  471. tm.assert_numpy_array_equal(result, expected)
  472. @pytest.mark.parametrize("index", [True, False])
  473. def test_searchsorted_different_tz(self, index):
  474. data = np.arange(10, dtype="i8") * 24 * 3600 * 10**9
  475. arr = pd.DatetimeIndex(data, freq="D")._data.tz_localize("Asia/Tokyo")
  476. if index:
  477. arr = pd.Index(arr)
  478. expected = arr.searchsorted(arr[2])
  479. result = arr.searchsorted(arr[2].tz_convert("UTC"))
  480. assert result == expected
  481. expected = arr.searchsorted(arr[2:6])
  482. result = arr.searchsorted(arr[2:6].tz_convert("UTC"))
  483. tm.assert_equal(result, expected)
  484. @pytest.mark.parametrize("index", [True, False])
  485. def test_searchsorted_tzawareness_compat(self, index):
  486. data = np.arange(10, dtype="i8") * 24 * 3600 * 10**9
  487. arr = pd.DatetimeIndex(data, freq="D")._data
  488. if index:
  489. arr = pd.Index(arr)
  490. mismatch = arr.tz_localize("Asia/Tokyo")
  491. msg = "Cannot compare tz-naive and tz-aware datetime-like objects"
  492. with pytest.raises(TypeError, match=msg):
  493. arr.searchsorted(mismatch[0])
  494. with pytest.raises(TypeError, match=msg):
  495. arr.searchsorted(mismatch)
  496. with pytest.raises(TypeError, match=msg):
  497. mismatch.searchsorted(arr[0])
  498. with pytest.raises(TypeError, match=msg):
  499. mismatch.searchsorted(arr)
  500. @pytest.mark.parametrize(
  501. "other",
  502. [
  503. 1,
  504. np.int64(1),
  505. 1.0,
  506. np.timedelta64("NaT"),
  507. pd.Timedelta(days=2),
  508. "invalid",
  509. np.arange(10, dtype="i8") * 24 * 3600 * 10**9,
  510. np.arange(10).view("timedelta64[ns]") * 24 * 3600 * 10**9,
  511. pd.Timestamp("2021-01-01").to_period("D"),
  512. ],
  513. )
  514. @pytest.mark.parametrize("index", [True, False])
  515. def test_searchsorted_invalid_types(self, other, index):
  516. data = np.arange(10, dtype="i8") * 24 * 3600 * 10**9
  517. arr = pd.DatetimeIndex(data, freq="D")._data
  518. if index:
  519. arr = pd.Index(arr)
  520. msg = "|".join(
  521. [
  522. "searchsorted requires compatible dtype or scalar",
  523. "value should be a 'Timestamp', 'NaT', or array of those. Got",
  524. ]
  525. )
  526. with pytest.raises(TypeError, match=msg):
  527. arr.searchsorted(other)
  528. def test_shift_fill_value(self):
  529. dti = pd.date_range("2016-01-01", periods=3)
  530. dta = dti._data
  531. expected = DatetimeArray._from_sequence(np.roll(dta._ndarray, 1))
  532. fv = dta[-1]
  533. for fill_value in [fv, fv.to_pydatetime(), fv.to_datetime64()]:
  534. result = dta.shift(1, fill_value=fill_value)
  535. tm.assert_datetime_array_equal(result, expected)
  536. dta = dta.tz_localize("UTC")
  537. expected = expected.tz_localize("UTC")
  538. fv = dta[-1]
  539. for fill_value in [fv, fv.to_pydatetime()]:
  540. result = dta.shift(1, fill_value=fill_value)
  541. tm.assert_datetime_array_equal(result, expected)
  542. def test_shift_value_tzawareness_mismatch(self):
  543. dti = pd.date_range("2016-01-01", periods=3)
  544. dta = dti._data
  545. fv = dta[-1].tz_localize("UTC")
  546. for invalid in [fv, fv.to_pydatetime()]:
  547. with pytest.raises(TypeError, match="Cannot compare"):
  548. dta.shift(1, fill_value=invalid)
  549. dta = dta.tz_localize("UTC")
  550. fv = dta[-1].tz_localize(None)
  551. for invalid in [fv, fv.to_pydatetime(), fv.to_datetime64()]:
  552. with pytest.raises(TypeError, match="Cannot compare"):
  553. dta.shift(1, fill_value=invalid)
  554. def test_shift_requires_tzmatch(self):
  555. # pre-2.0 we required exact tz match, in 2.0 we require just
  556. # matching tzawareness
  557. dti = pd.date_range("2016-01-01", periods=3, tz="UTC")
  558. dta = dti._data
  559. fill_value = pd.Timestamp("2020-10-18 18:44", tz="US/Pacific")
  560. result = dta.shift(1, fill_value=fill_value)
  561. expected = dta.shift(1, fill_value=fill_value.tz_convert("UTC"))
  562. tm.assert_equal(result, expected)
  563. def test_tz_localize_t2d(self):
  564. dti = pd.date_range("1994-05-12", periods=12, tz="US/Pacific")
  565. dta = dti._data.reshape(3, 4)
  566. result = dta.tz_localize(None)
  567. expected = dta.ravel().tz_localize(None).reshape(dta.shape)
  568. tm.assert_datetime_array_equal(result, expected)
  569. roundtrip = expected.tz_localize("US/Pacific")
  570. tm.assert_datetime_array_equal(roundtrip, dta)
  571. easts = ["US/Eastern", "dateutil/US/Eastern"]
  572. if ZoneInfo is not None:
  573. try:
  574. tz = ZoneInfo("US/Eastern")
  575. except KeyError:
  576. # no tzdata
  577. pass
  578. else:
  579. # Argument 1 to "append" of "list" has incompatible type "ZoneInfo";
  580. # expected "str"
  581. easts.append(tz) # type: ignore[arg-type]
  582. @pytest.mark.parametrize("tz", easts)
  583. def test_iter_zoneinfo_fold(self, tz):
  584. # GH#49684
  585. utc_vals = np.array(
  586. [1320552000, 1320555600, 1320559200, 1320562800], dtype=np.int64
  587. )
  588. utc_vals *= 1_000_000_000
  589. dta = DatetimeArray._from_sequence(utc_vals).tz_localize("UTC").tz_convert(tz)
  590. left = dta[2]
  591. right = list(dta)[2]
  592. assert str(left) == str(right)
  593. # previously there was a bug where with non-pytz right would be
  594. # Timestamp('2011-11-06 01:00:00-0400', tz='US/Eastern')
  595. # while left would be
  596. # Timestamp('2011-11-06 01:00:00-0500', tz='US/Eastern')
  597. # The .value's would match (so they would compare as equal),
  598. # but the folds would not
  599. assert left.utcoffset() == right.utcoffset()
  600. # The same bug in ints_to_pydatetime affected .astype, so we test
  601. # that here.
  602. right2 = dta.astype(object)[2]
  603. assert str(left) == str(right2)
  604. assert left.utcoffset() == right2.utcoffset()
  605. @pytest.mark.parametrize(
  606. "freq, freq_depr",
  607. [
  608. ("2ME", "2M"),
  609. ("2SME", "2SM"),
  610. ("2SME", "2sm"),
  611. ("2QE", "2Q"),
  612. ("2QE-SEP", "2Q-SEP"),
  613. ("1YE", "1Y"),
  614. ("2YE-MAR", "2Y-MAR"),
  615. ("1YE", "1A"),
  616. ("2YE-MAR", "2A-MAR"),
  617. ("2ME", "2m"),
  618. ("2QE-SEP", "2q-sep"),
  619. ("2YE-MAR", "2a-mar"),
  620. ("2YE", "2y"),
  621. ],
  622. )
  623. def test_date_range_frequency_M_Q_Y_A_deprecated(self, freq, freq_depr):
  624. # GH#9586, GH#54275
  625. depr_msg = f"'{freq_depr[1:]}' is deprecated and will be removed "
  626. f"in a future version, please use '{freq[1:]}' instead."
  627. expected = pd.date_range("1/1/2000", periods=4, freq=freq)
  628. with tm.assert_produces_warning(FutureWarning, match=depr_msg):
  629. result = pd.date_range("1/1/2000", periods=4, freq=freq_depr)
  630. tm.assert_index_equal(result, expected)
  631. @pytest.mark.parametrize("freq_depr", ["2H", "2CBH", "2MIN", "2S", "2mS", "2Us"])
  632. def test_date_range_uppercase_frequency_deprecated(self, freq_depr):
  633. # GH#9586, GH#54939
  634. depr_msg = f"'{freq_depr[1:]}' is deprecated and will be removed in a "
  635. f"future version. Please use '{freq_depr.lower()[1:]}' instead."
  636. expected = pd.date_range("1/1/2000", periods=4, freq=freq_depr.lower())
  637. with tm.assert_produces_warning(FutureWarning, match=depr_msg):
  638. result = pd.date_range("1/1/2000", periods=4, freq=freq_depr)
  639. tm.assert_index_equal(result, expected)
  640. @pytest.mark.parametrize(
  641. "freq_depr",
  642. [
  643. "2ye-mar",
  644. "2ys",
  645. "2qe",
  646. "2qs-feb",
  647. "2bqs",
  648. "2sms",
  649. "2bms",
  650. "2cbme",
  651. "2me",
  652. "2w",
  653. ],
  654. )
  655. def test_date_range_lowercase_frequency_deprecated(self, freq_depr):
  656. # GH#9586, GH#54939
  657. depr_msg = f"'{freq_depr[1:]}' is deprecated and will be removed in a "
  658. f"future version, please use '{freq_depr.upper()[1:]}' instead."
  659. expected = pd.date_range("1/1/2000", periods=4, freq=freq_depr.upper())
  660. with tm.assert_produces_warning(FutureWarning, match=depr_msg):
  661. result = pd.date_range("1/1/2000", periods=4, freq=freq_depr)
  662. tm.assert_index_equal(result, expected)
  663. def test_factorize_sort_without_freq():
  664. dta = DatetimeArray._from_sequence([0, 2, 1], dtype="M8[ns]")
  665. msg = r"call pd.factorize\(obj, sort=True\) instead"
  666. with pytest.raises(NotImplementedError, match=msg):
  667. dta.factorize(sort=True)
  668. # Do TimedeltaArray while we're here
  669. tda = dta - dta[0]
  670. with pytest.raises(NotImplementedError, match=msg):
  671. tda.factorize(sort=True)