test_iteration.py 5.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160
  1. import datetime
  2. import numpy as np
  3. import pytest
  4. from pandas.compat import (
  5. IS64,
  6. is_platform_windows,
  7. )
  8. from pandas import (
  9. Categorical,
  10. DataFrame,
  11. Series,
  12. date_range,
  13. )
  14. import pandas._testing as tm
  15. class TestIteration:
  16. def test_keys(self, float_frame):
  17. assert float_frame.keys() is float_frame.columns
  18. def test_iteritems(self):
  19. df = DataFrame([[1, 2, 3], [4, 5, 6]], columns=["a", "a", "b"])
  20. for k, v in df.items():
  21. assert isinstance(v, DataFrame._constructor_sliced)
  22. def test_items(self):
  23. # GH#17213, GH#13918
  24. cols = ["a", "b", "c"]
  25. df = DataFrame([[1, 2, 3], [4, 5, 6]], columns=cols)
  26. for c, (k, v) in zip(cols, df.items()):
  27. assert c == k
  28. assert isinstance(v, Series)
  29. assert (df[k] == v).all()
  30. def test_items_names(self, float_string_frame):
  31. for k, v in float_string_frame.items():
  32. assert v.name == k
  33. def test_iter(self, float_frame):
  34. assert list(float_frame) == list(float_frame.columns)
  35. def test_iterrows(self, float_frame, float_string_frame):
  36. for k, v in float_frame.iterrows():
  37. exp = float_frame.loc[k]
  38. tm.assert_series_equal(v, exp)
  39. for k, v in float_string_frame.iterrows():
  40. exp = float_string_frame.loc[k]
  41. tm.assert_series_equal(v, exp)
  42. def test_iterrows_iso8601(self):
  43. # GH#19671
  44. s = DataFrame(
  45. {
  46. "non_iso8601": ["M1701", "M1802", "M1903", "M2004"],
  47. "iso8601": date_range("2000-01-01", periods=4, freq="ME"),
  48. }
  49. )
  50. for k, v in s.iterrows():
  51. exp = s.loc[k]
  52. tm.assert_series_equal(v, exp)
  53. def test_iterrows_corner(self):
  54. # GH#12222
  55. df = DataFrame(
  56. {
  57. "a": [datetime.datetime(2015, 1, 1)],
  58. "b": [None],
  59. "c": [None],
  60. "d": [""],
  61. "e": [[]],
  62. "f": [set()],
  63. "g": [{}],
  64. }
  65. )
  66. expected = Series(
  67. [datetime.datetime(2015, 1, 1), None, None, "", [], set(), {}],
  68. index=list("abcdefg"),
  69. name=0,
  70. dtype="object",
  71. )
  72. _, result = next(df.iterrows())
  73. tm.assert_series_equal(result, expected)
  74. def test_itertuples(self, float_frame):
  75. for i, tup in enumerate(float_frame.itertuples()):
  76. ser = DataFrame._constructor_sliced(tup[1:])
  77. ser.name = tup[0]
  78. expected = float_frame.iloc[i, :].reset_index(drop=True)
  79. tm.assert_series_equal(ser, expected)
  80. def test_itertuples_index_false(self):
  81. df = DataFrame(
  82. {"floats": np.random.default_rng(2).standard_normal(5), "ints": range(5)},
  83. columns=["floats", "ints"],
  84. )
  85. for tup in df.itertuples(index=False):
  86. assert isinstance(tup[1], int)
  87. def test_itertuples_duplicate_cols(self):
  88. df = DataFrame(data={"a": [1, 2, 3], "b": [4, 5, 6]})
  89. dfaa = df[["a", "a"]]
  90. assert list(dfaa.itertuples()) == [(0, 1, 1), (1, 2, 2), (2, 3, 3)]
  91. # repr with int on 32-bit/windows
  92. if not (is_platform_windows() or not IS64):
  93. assert (
  94. repr(list(df.itertuples(name=None)))
  95. == "[(0, 1, 4), (1, 2, 5), (2, 3, 6)]"
  96. )
  97. def test_itertuples_tuple_name(self):
  98. df = DataFrame(data={"a": [1, 2, 3], "b": [4, 5, 6]})
  99. tup = next(df.itertuples(name="TestName"))
  100. assert tup._fields == ("Index", "a", "b")
  101. assert (tup.Index, tup.a, tup.b) == tup
  102. assert type(tup).__name__ == "TestName"
  103. def test_itertuples_disallowed_col_labels(self):
  104. df = DataFrame(data={"def": [1, 2, 3], "return": [4, 5, 6]})
  105. tup2 = next(df.itertuples(name="TestName"))
  106. assert tup2 == (0, 1, 4)
  107. assert tup2._fields == ("Index", "_1", "_2")
  108. @pytest.mark.parametrize("limit", [254, 255, 1024])
  109. @pytest.mark.parametrize("index", [True, False])
  110. def test_itertuples_py2_3_field_limit_namedtuple(self, limit, index):
  111. # GH#28282
  112. df = DataFrame([{f"foo_{i}": f"bar_{i}" for i in range(limit)}])
  113. result = next(df.itertuples(index=index))
  114. assert isinstance(result, tuple)
  115. assert hasattr(result, "_fields")
  116. def test_sequence_like_with_categorical(self):
  117. # GH#7839
  118. # make sure can iterate
  119. df = DataFrame(
  120. {"id": [1, 2, 3, 4, 5, 6], "raw_grade": ["a", "b", "b", "a", "a", "e"]}
  121. )
  122. df["grade"] = Categorical(df["raw_grade"])
  123. # basic sequencing testing
  124. result = list(df.grade.values)
  125. expected = np.array(df.grade.values).tolist()
  126. tm.assert_almost_equal(result, expected)
  127. # iteration
  128. for t in df.itertuples(index=False):
  129. str(t)
  130. for row, s in df.iterrows():
  131. str(s)
  132. for c, col in df.items():
  133. str(col)