test_equals.py 2.9 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485
  1. import numpy as np
  2. from pandas import (
  3. DataFrame,
  4. date_range,
  5. )
  6. import pandas._testing as tm
  7. class TestEquals:
  8. def test_dataframe_not_equal(self):
  9. # see GH#28839
  10. df1 = DataFrame({"a": [1, 2], "b": ["s", "d"]})
  11. df2 = DataFrame({"a": ["s", "d"], "b": [1, 2]})
  12. assert df1.equals(df2) is False
  13. def test_equals_different_blocks(self, using_array_manager, using_infer_string):
  14. # GH#9330
  15. df0 = DataFrame({"A": ["x", "y"], "B": [1, 2], "C": ["w", "z"]})
  16. df1 = df0.reset_index()[["A", "B", "C"]]
  17. if not using_array_manager and not using_infer_string:
  18. # this assert verifies that the above operations have
  19. # induced a block rearrangement
  20. assert df0._mgr.blocks[0].dtype != df1._mgr.blocks[0].dtype
  21. # do the real tests
  22. tm.assert_frame_equal(df0, df1)
  23. assert df0.equals(df1)
  24. assert df1.equals(df0)
  25. def test_equals(self):
  26. # Add object dtype column with nans
  27. index = np.random.default_rng(2).random(10)
  28. df1 = DataFrame(
  29. np.random.default_rng(2).random(10), index=index, columns=["floats"]
  30. )
  31. df1["text"] = "the sky is so blue. we could use more chocolate.".split()
  32. df1["start"] = date_range("2000-1-1", periods=10, freq="min")
  33. df1["end"] = date_range("2000-1-1", periods=10, freq="D")
  34. df1["diff"] = df1["end"] - df1["start"]
  35. # Explicitly cast to object, to avoid implicit cast when setting np.nan
  36. df1["bool"] = (np.arange(10) % 3 == 0).astype(object)
  37. df1.loc[::2] = np.nan
  38. df2 = df1.copy()
  39. assert df1["text"].equals(df2["text"])
  40. assert df1["start"].equals(df2["start"])
  41. assert df1["end"].equals(df2["end"])
  42. assert df1["diff"].equals(df2["diff"])
  43. assert df1["bool"].equals(df2["bool"])
  44. assert df1.equals(df2)
  45. assert not df1.equals(object)
  46. # different dtype
  47. different = df1.copy()
  48. different["floats"] = different["floats"].astype("float32")
  49. assert not df1.equals(different)
  50. # different index
  51. different_index = -index
  52. different = df2.set_index(different_index)
  53. assert not df1.equals(different)
  54. # different columns
  55. different = df2.copy()
  56. different.columns = df2.columns[::-1]
  57. assert not df1.equals(different)
  58. # DatetimeIndex
  59. index = date_range("2000-1-1", periods=10, freq="min")
  60. df1 = df1.set_index(index)
  61. df2 = df1.copy()
  62. assert df1.equals(df2)
  63. # MultiIndex
  64. df3 = df1.set_index(["text"], append=True)
  65. df2 = df1.set_index(["text"], append=True)
  66. assert df3.equals(df2)
  67. df2 = df1.set_index(["floats"], append=True)
  68. assert not df3.equals(df2)
  69. # NaN in index
  70. df3 = df1.set_index(["floats"], append=True)
  71. df2 = df1.set_index(["floats"], append=True)
  72. assert df3.equals(df2)