test_reduction.py 4.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123
  1. import numpy as np
  2. import pytest
  3. import pandas as pd
  4. from pandas import (
  5. DataFrame,
  6. Series,
  7. array,
  8. )
  9. import pandas._testing as tm
  10. @pytest.mark.parametrize(
  11. "op, expected",
  12. [
  13. ["sum", np.int64(3)],
  14. ["prod", np.int64(2)],
  15. ["min", np.int64(1)],
  16. ["max", np.int64(2)],
  17. ["mean", np.float64(1.5)],
  18. ["median", np.float64(1.5)],
  19. ["var", np.float64(0.5)],
  20. ["std", np.float64(0.5**0.5)],
  21. ["skew", pd.NA],
  22. ["kurt", pd.NA],
  23. ["any", True],
  24. ["all", True],
  25. ],
  26. )
  27. def test_series_reductions(op, expected):
  28. ser = Series([1, 2], dtype="Int64")
  29. result = getattr(ser, op)()
  30. tm.assert_equal(result, expected)
  31. @pytest.mark.parametrize(
  32. "op, expected",
  33. [
  34. ["sum", Series([3], index=["a"], dtype="Int64")],
  35. ["prod", Series([2], index=["a"], dtype="Int64")],
  36. ["min", Series([1], index=["a"], dtype="Int64")],
  37. ["max", Series([2], index=["a"], dtype="Int64")],
  38. ["mean", Series([1.5], index=["a"], dtype="Float64")],
  39. ["median", Series([1.5], index=["a"], dtype="Float64")],
  40. ["var", Series([0.5], index=["a"], dtype="Float64")],
  41. ["std", Series([0.5**0.5], index=["a"], dtype="Float64")],
  42. ["skew", Series([pd.NA], index=["a"], dtype="Float64")],
  43. ["kurt", Series([pd.NA], index=["a"], dtype="Float64")],
  44. ["any", Series([True], index=["a"], dtype="boolean")],
  45. ["all", Series([True], index=["a"], dtype="boolean")],
  46. ],
  47. )
  48. def test_dataframe_reductions(op, expected):
  49. df = DataFrame({"a": array([1, 2], dtype="Int64")})
  50. result = getattr(df, op)()
  51. tm.assert_series_equal(result, expected)
  52. @pytest.mark.parametrize(
  53. "op, expected",
  54. [
  55. ["sum", array([1, 3], dtype="Int64")],
  56. ["prod", array([1, 3], dtype="Int64")],
  57. ["min", array([1, 3], dtype="Int64")],
  58. ["max", array([1, 3], dtype="Int64")],
  59. ["mean", array([1, 3], dtype="Float64")],
  60. ["median", array([1, 3], dtype="Float64")],
  61. ["var", array([pd.NA], dtype="Float64")],
  62. ["std", array([pd.NA], dtype="Float64")],
  63. ["skew", array([pd.NA], dtype="Float64")],
  64. ["any", array([True, True], dtype="boolean")],
  65. ["all", array([True, True], dtype="boolean")],
  66. ],
  67. )
  68. def test_groupby_reductions(op, expected):
  69. df = DataFrame(
  70. {
  71. "A": ["a", "b", "b"],
  72. "B": array([1, None, 3], dtype="Int64"),
  73. }
  74. )
  75. result = getattr(df.groupby("A"), op)()
  76. expected = DataFrame(expected, index=pd.Index(["a", "b"], name="A"), columns=["B"])
  77. tm.assert_frame_equal(result, expected)
  78. @pytest.mark.parametrize(
  79. "op, expected",
  80. [
  81. ["sum", Series([4, 4], index=["B", "C"], dtype="Float64")],
  82. ["prod", Series([3, 3], index=["B", "C"], dtype="Float64")],
  83. ["min", Series([1, 1], index=["B", "C"], dtype="Float64")],
  84. ["max", Series([3, 3], index=["B", "C"], dtype="Float64")],
  85. ["mean", Series([2, 2], index=["B", "C"], dtype="Float64")],
  86. ["median", Series([2, 2], index=["B", "C"], dtype="Float64")],
  87. ["var", Series([2, 2], index=["B", "C"], dtype="Float64")],
  88. ["std", Series([2**0.5, 2**0.5], index=["B", "C"], dtype="Float64")],
  89. ["skew", Series([pd.NA, pd.NA], index=["B", "C"], dtype="Float64")],
  90. ["kurt", Series([pd.NA, pd.NA], index=["B", "C"], dtype="Float64")],
  91. ["any", Series([True, True, True], index=["A", "B", "C"], dtype="boolean")],
  92. ["all", Series([True, True, True], index=["A", "B", "C"], dtype="boolean")],
  93. ],
  94. )
  95. def test_mixed_reductions(op, expected):
  96. df = DataFrame(
  97. {
  98. "A": ["a", "b", "b"],
  99. "B": [1, None, 3],
  100. "C": array([1, None, 3], dtype="Int64"),
  101. }
  102. )
  103. # series
  104. result = getattr(df.C, op)()
  105. tm.assert_equal(result, expected["C"])
  106. # frame
  107. if op in ["any", "all"]:
  108. result = getattr(df, op)()
  109. else:
  110. result = getattr(df, op)(numeric_only=True)
  111. tm.assert_series_equal(result, expected)