test_to_timedelta.py 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340
  1. from datetime import (
  2. time,
  3. timedelta,
  4. )
  5. import numpy as np
  6. import pytest
  7. from pandas.compat import IS64
  8. from pandas.errors import OutOfBoundsTimedelta
  9. import pandas as pd
  10. from pandas import (
  11. Series,
  12. TimedeltaIndex,
  13. isna,
  14. to_timedelta,
  15. )
  16. import pandas._testing as tm
  17. from pandas.core.arrays import TimedeltaArray
  18. class TestTimedeltas:
  19. def test_to_timedelta_dt64_raises(self):
  20. # Passing datetime64-dtype data to TimedeltaIndex is no longer
  21. # supported GH#29794
  22. msg = r"dtype datetime64\[ns\] cannot be converted to timedelta64\[ns\]"
  23. ser = Series([pd.NaT])
  24. with pytest.raises(TypeError, match=msg):
  25. to_timedelta(ser)
  26. with pytest.raises(TypeError, match=msg):
  27. ser.to_frame().apply(to_timedelta)
  28. @pytest.mark.parametrize("readonly", [True, False])
  29. def test_to_timedelta_readonly(self, readonly):
  30. # GH#34857
  31. arr = np.array([], dtype=object)
  32. if readonly:
  33. arr.setflags(write=False)
  34. result = to_timedelta(arr)
  35. expected = to_timedelta([])
  36. tm.assert_index_equal(result, expected)
  37. def test_to_timedelta_null(self):
  38. result = to_timedelta(["", ""])
  39. assert isna(result).all()
  40. def test_to_timedelta_same_np_timedelta64(self):
  41. # pass thru
  42. result = to_timedelta(np.array([np.timedelta64(1, "s")]))
  43. expected = pd.Index(np.array([np.timedelta64(1, "s")]))
  44. tm.assert_index_equal(result, expected)
  45. def test_to_timedelta_series(self):
  46. # Series
  47. expected = Series([timedelta(days=1), timedelta(days=1, seconds=1)])
  48. result = to_timedelta(Series(["1d", "1days 00:00:01"]))
  49. tm.assert_series_equal(result, expected)
  50. def test_to_timedelta_units(self):
  51. # with units
  52. result = TimedeltaIndex(
  53. [np.timedelta64(0, "ns"), np.timedelta64(10, "s").astype("m8[ns]")]
  54. )
  55. expected = to_timedelta([0, 10], unit="s")
  56. tm.assert_index_equal(result, expected)
  57. @pytest.mark.parametrize(
  58. "dtype, unit",
  59. [
  60. ["int64", "s"],
  61. ["int64", "m"],
  62. ["int64", "h"],
  63. ["timedelta64[s]", "s"],
  64. ["timedelta64[D]", "D"],
  65. ],
  66. )
  67. def test_to_timedelta_units_dtypes(self, dtype, unit):
  68. # arrays of various dtypes
  69. arr = np.array([1] * 5, dtype=dtype)
  70. result = to_timedelta(arr, unit=unit)
  71. exp_dtype = "m8[ns]" if dtype == "int64" else "m8[s]"
  72. expected = TimedeltaIndex([np.timedelta64(1, unit)] * 5, dtype=exp_dtype)
  73. tm.assert_index_equal(result, expected)
  74. def test_to_timedelta_oob_non_nano(self):
  75. arr = np.array([pd.NaT._value + 1], dtype="timedelta64[m]")
  76. msg = (
  77. "Cannot convert -9223372036854775807 minutes to "
  78. r"timedelta64\[s\] without overflow"
  79. )
  80. with pytest.raises(OutOfBoundsTimedelta, match=msg):
  81. to_timedelta(arr)
  82. with pytest.raises(OutOfBoundsTimedelta, match=msg):
  83. TimedeltaIndex(arr)
  84. with pytest.raises(OutOfBoundsTimedelta, match=msg):
  85. TimedeltaArray._from_sequence(arr, dtype="m8[s]")
  86. @pytest.mark.parametrize(
  87. "arg", [np.arange(10).reshape(2, 5), pd.DataFrame(np.arange(10).reshape(2, 5))]
  88. )
  89. @pytest.mark.parametrize("errors", ["ignore", "raise", "coerce"])
  90. @pytest.mark.filterwarnings("ignore:errors='ignore' is deprecated:FutureWarning")
  91. def test_to_timedelta_dataframe(self, arg, errors):
  92. # GH 11776
  93. with pytest.raises(TypeError, match="1-d array"):
  94. to_timedelta(arg, errors=errors)
  95. def test_to_timedelta_invalid_errors(self):
  96. # bad value for errors parameter
  97. msg = "errors must be one of"
  98. with pytest.raises(ValueError, match=msg):
  99. to_timedelta(["foo"], errors="never")
  100. @pytest.mark.parametrize("arg", [[1, 2], 1])
  101. def test_to_timedelta_invalid_unit(self, arg):
  102. # these will error
  103. msg = "invalid unit abbreviation: foo"
  104. with pytest.raises(ValueError, match=msg):
  105. to_timedelta(arg, unit="foo")
  106. def test_to_timedelta_time(self):
  107. # time not supported ATM
  108. msg = (
  109. "Value must be Timedelta, string, integer, float, timedelta or convertible"
  110. )
  111. with pytest.raises(ValueError, match=msg):
  112. to_timedelta(time(second=1))
  113. assert to_timedelta(time(second=1), errors="coerce") is pd.NaT
  114. def test_to_timedelta_bad_value(self):
  115. msg = "Could not convert 'foo' to NumPy timedelta"
  116. with pytest.raises(ValueError, match=msg):
  117. to_timedelta(["foo", "bar"])
  118. def test_to_timedelta_bad_value_coerce(self):
  119. tm.assert_index_equal(
  120. TimedeltaIndex([pd.NaT, pd.NaT]),
  121. to_timedelta(["foo", "bar"], errors="coerce"),
  122. )
  123. tm.assert_index_equal(
  124. TimedeltaIndex(["1 day", pd.NaT, "1 min"]),
  125. to_timedelta(["1 day", "bar", "1 min"], errors="coerce"),
  126. )
  127. def test_to_timedelta_invalid_errors_ignore(self):
  128. # gh-13613: these should not error because errors='ignore'
  129. msg = "errors='ignore' is deprecated"
  130. invalid_data = "apple"
  131. with tm.assert_produces_warning(FutureWarning, match=msg):
  132. result = to_timedelta(invalid_data, errors="ignore")
  133. assert invalid_data == result
  134. invalid_data = ["apple", "1 days"]
  135. expected = np.array(invalid_data, dtype=object)
  136. with tm.assert_produces_warning(FutureWarning, match=msg):
  137. result = to_timedelta(invalid_data, errors="ignore")
  138. tm.assert_numpy_array_equal(expected, result)
  139. invalid_data = pd.Index(["apple", "1 days"])
  140. with tm.assert_produces_warning(FutureWarning, match=msg):
  141. result = to_timedelta(invalid_data, errors="ignore")
  142. tm.assert_index_equal(invalid_data, result)
  143. invalid_data = Series(["apple", "1 days"])
  144. with tm.assert_produces_warning(FutureWarning, match=msg):
  145. result = to_timedelta(invalid_data, errors="ignore")
  146. tm.assert_series_equal(invalid_data, result)
  147. @pytest.mark.parametrize(
  148. "val, errors",
  149. [
  150. ("1M", True),
  151. ("1 M", True),
  152. ("1Y", True),
  153. ("1 Y", True),
  154. ("1y", True),
  155. ("1 y", True),
  156. ("1m", False),
  157. ("1 m", False),
  158. ("1 day", False),
  159. ("2day", False),
  160. ],
  161. )
  162. def test_unambiguous_timedelta_values(self, val, errors):
  163. # GH36666 Deprecate use of strings denoting units with 'M', 'Y', 'm' or 'y'
  164. # in pd.to_timedelta
  165. msg = "Units 'M', 'Y' and 'y' do not represent unambiguous timedelta"
  166. if errors:
  167. with pytest.raises(ValueError, match=msg):
  168. to_timedelta(val)
  169. else:
  170. # check it doesn't raise
  171. to_timedelta(val)
  172. def test_to_timedelta_via_apply(self):
  173. # GH 5458
  174. expected = Series([np.timedelta64(1, "s")])
  175. result = Series(["00:00:01"]).apply(to_timedelta)
  176. tm.assert_series_equal(result, expected)
  177. result = Series([to_timedelta("00:00:01")])
  178. tm.assert_series_equal(result, expected)
  179. def test_to_timedelta_inference_without_warning(self):
  180. # GH#41731 inference produces a warning in the Series constructor,
  181. # but _not_ in to_timedelta
  182. vals = ["00:00:01", pd.NaT]
  183. with tm.assert_produces_warning(None):
  184. result = to_timedelta(vals)
  185. expected = TimedeltaIndex([pd.Timedelta(seconds=1), pd.NaT])
  186. tm.assert_index_equal(result, expected)
  187. def test_to_timedelta_on_missing_values(self):
  188. # GH5438
  189. timedelta_NaT = np.timedelta64("NaT")
  190. actual = to_timedelta(Series(["00:00:01", np.nan]))
  191. expected = Series(
  192. [np.timedelta64(1000000000, "ns"), timedelta_NaT],
  193. dtype=f"{tm.ENDIAN}m8[ns]",
  194. )
  195. tm.assert_series_equal(actual, expected)
  196. ser = Series(["00:00:01", pd.NaT], dtype="m8[ns]")
  197. actual = to_timedelta(ser)
  198. tm.assert_series_equal(actual, expected)
  199. @pytest.mark.parametrize("val", [np.nan, pd.NaT, pd.NA])
  200. def test_to_timedelta_on_missing_values_scalar(self, val):
  201. actual = to_timedelta(val)
  202. assert actual._value == np.timedelta64("NaT").astype("int64")
  203. @pytest.mark.parametrize("val", [np.nan, pd.NaT, pd.NA])
  204. def test_to_timedelta_on_missing_values_list(self, val):
  205. actual = to_timedelta([val])
  206. assert actual[0]._value == np.timedelta64("NaT").astype("int64")
  207. @pytest.mark.xfail(not IS64, reason="Floating point error")
  208. def test_to_timedelta_float(self):
  209. # https://github.com/pandas-dev/pandas/issues/25077
  210. arr = np.arange(0, 1, 1e-6)[-10:]
  211. result = to_timedelta(arr, unit="s")
  212. expected_asi8 = np.arange(999990000, 10**9, 1000, dtype="int64")
  213. tm.assert_numpy_array_equal(result.asi8, expected_asi8)
  214. def test_to_timedelta_coerce_strings_unit(self):
  215. arr = np.array([1, 2, "error"], dtype=object)
  216. result = to_timedelta(arr, unit="ns", errors="coerce")
  217. expected = to_timedelta([1, 2, pd.NaT], unit="ns")
  218. tm.assert_index_equal(result, expected)
  219. def test_to_timedelta_ignore_strings_unit(self):
  220. arr = np.array([1, 2, "error"], dtype=object)
  221. msg = "errors='ignore' is deprecated"
  222. with tm.assert_produces_warning(FutureWarning, match=msg):
  223. result = to_timedelta(arr, unit="ns", errors="ignore")
  224. tm.assert_numpy_array_equal(result, arr)
  225. @pytest.mark.parametrize(
  226. "expected_val, result_val", [[timedelta(days=2), 2], [None, None]]
  227. )
  228. def test_to_timedelta_nullable_int64_dtype(self, expected_val, result_val):
  229. # GH 35574
  230. expected = Series([timedelta(days=1), expected_val])
  231. result = to_timedelta(Series([1, result_val], dtype="Int64"), unit="days")
  232. tm.assert_series_equal(result, expected)
  233. @pytest.mark.parametrize(
  234. ("input", "expected"),
  235. [
  236. ("8:53:08.71800000001", "8:53:08.718"),
  237. ("8:53:08.718001", "8:53:08.718001"),
  238. ("8:53:08.7180000001", "8:53:08.7180000001"),
  239. ("-8:53:08.71800000001", "-8:53:08.718"),
  240. ("8:53:08.7180000089", "8:53:08.718000008"),
  241. ],
  242. )
  243. @pytest.mark.parametrize("func", [pd.Timedelta, to_timedelta])
  244. def test_to_timedelta_precision_over_nanos(self, input, expected, func):
  245. # GH: 36738
  246. expected = pd.Timedelta(expected)
  247. result = func(input)
  248. assert result == expected
  249. def test_to_timedelta_zerodim(self, fixed_now_ts):
  250. # ndarray.item() incorrectly returns int for dt64[ns] and td64[ns]
  251. dt64 = fixed_now_ts.to_datetime64()
  252. arg = np.array(dt64)
  253. msg = (
  254. "Value must be Timedelta, string, integer, float, timedelta "
  255. "or convertible, not datetime64"
  256. )
  257. with pytest.raises(ValueError, match=msg):
  258. to_timedelta(arg)
  259. arg2 = arg.view("m8[ns]")
  260. result = to_timedelta(arg2)
  261. assert isinstance(result, pd.Timedelta)
  262. assert result._value == dt64.view("i8")
  263. def test_to_timedelta_numeric_ea(self, any_numeric_ea_dtype):
  264. # GH#48796
  265. ser = Series([1, pd.NA], dtype=any_numeric_ea_dtype)
  266. result = to_timedelta(ser)
  267. expected = Series([pd.Timedelta(1, unit="ns"), pd.NaT])
  268. tm.assert_series_equal(result, expected)
  269. def test_to_timedelta_fraction(self):
  270. result = to_timedelta(1.0 / 3, unit="h")
  271. expected = pd.Timedelta("0 days 00:19:59.999999998")
  272. assert result == expected
  273. def test_from_numeric_arrow_dtype(any_numeric_ea_dtype):
  274. # GH 52425
  275. pytest.importorskip("pyarrow")
  276. ser = Series([1, 2], dtype=f"{any_numeric_ea_dtype.lower()}[pyarrow]")
  277. result = to_timedelta(ser)
  278. expected = Series([1, 2], dtype="timedelta64[ns]")
  279. tm.assert_series_equal(result, expected)
  280. @pytest.mark.parametrize("unit", ["ns", "ms"])
  281. def test_from_timedelta_arrow_dtype(unit):
  282. # GH 54298
  283. pytest.importorskip("pyarrow")
  284. expected = Series([timedelta(1)], dtype=f"duration[{unit}][pyarrow]")
  285. result = to_timedelta(expected)
  286. tm.assert_series_equal(result, expected)