diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 5160d2ea8b8fe..8d3e9fcdf04ce 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -743,6 +743,7 @@ Indexing - Bug in :meth:`DataFrame.__getitem__` when slicing a :class:`DataFrame` with many rows raised an ``OverflowError`` (:issue:`59531`) - Bug in :meth:`DataFrame.from_records` throwing a ``ValueError`` when passed an empty list in ``index`` (:issue:`58594`) - Bug in :meth:`DataFrame.loc` with inconsistent behavior of loc-set with 2 given indexes to Series (:issue:`59933`) +- Bug in :meth:`Index.equals` when comparing between :class:`Series` with string dtype :class:`Index` (:issue:`61099`) - Bug in :meth:`Index.get_indexer` and similar methods when ``NaN`` is located at or after position 128 (:issue:`58924`) - Bug in :meth:`MultiIndex.insert` when a new value inserted to a datetime-like level gets cast to ``NaT`` and fails indexing (:issue:`60388`) - Bug in printing :attr:`Index.names` and :attr:`MultiIndex.levels` would not escape single quotes (:issue:`60190`) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 4e1ea07907cdb..aed8287926810 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -5481,11 +5481,7 @@ def equals(self, other: Any) -> bool: # quickly return if the lengths are different return False - if ( - isinstance(self.dtype, StringDtype) - and self.dtype.na_value is np.nan - and other.dtype != self.dtype - ): + if isinstance(self.dtype, StringDtype) and other.dtype != self.dtype: # TODO(infer_string) can we avoid this special case? # special case for object behavior return other.equals(self.astype(object)) diff --git a/pandas/tests/frame/test_arithmetic.py b/pandas/tests/frame/test_arithmetic.py index bc69ec388bf0c..e6a86dad8abf6 100644 --- a/pandas/tests/frame/test_arithmetic.py +++ b/pandas/tests/frame/test_arithmetic.py @@ -11,7 +11,7 @@ import numpy as np import pytest -from pandas.compat import HAS_PYARROW +import pandas.util._test_decorators as td import pandas as pd from pandas import ( @@ -2183,19 +2183,28 @@ def test_enum_column_equality(): tm.assert_series_equal(result, expected) -def test_mixed_col_index_dtype(using_infer_string): +@pytest.mark.parametrize( + "dtype", + [ + "string[python]", + pytest.param( + "string[pyarrow]", + marks=td.skip_if_no("pyarrow"), + ), + pytest.param( + "str", + marks=td.skip_if_no("pyarrow"), + ), + ], +) +def test_mixed_col_index_dtype(dtype): # GH 47382 df1 = DataFrame(columns=list("abc"), data=1.0, index=[0]) df2 = DataFrame(columns=list("abc"), data=0.0, index=[0]) - df1.columns = df2.columns.astype("string") + df1.columns = df2.columns.astype(dtype) result = df1 + df2 expected = DataFrame(columns=list("abc"), data=1.0, index=[0]) - if using_infer_string: - # df2.columns.dtype will be "str" instead of object, - # so the aligned result will be "string", not object - if HAS_PYARROW: - dtype = "string[pyarrow]" - else: - dtype = "string" - expected.columns = expected.columns.astype(dtype) + + expected.columns = expected.columns.astype(dtype) + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index 5b75bd9afd6df..fa3510996b3b7 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -40,6 +40,7 @@ ensure_index, ensure_index_from_sequences, ) +from pandas.testing import assert_series_equal class TestIndex: @@ -1717,3 +1718,51 @@ def test_is_monotonic_pyarrow_list_type(): idx = Index([[1], [2, 3]], dtype=pd.ArrowDtype(pa.list_(pa.int64()))) assert not idx.is_monotonic_increasing assert not idx.is_monotonic_decreasing + + +@pytest.mark.parametrize( + "dtype", + [ + "string[python]", + pytest.param( + "string[pyarrow]", + marks=td.skip_if_no("pyarrow"), + ), + pytest.param( + "str", + marks=td.skip_if_no("pyarrow"), + ), + ], +) +def test_index_equals_different_string_dtype(dtype): + # GH 61099 + idx_obj = Index(["a", "b", "c"]) + idx_str = Index(["a", "b", "c"], dtype=dtype) + + assert idx_obj.equals(idx_str) + assert idx_str.equals(idx_obj) + + +@pytest.mark.parametrize( + "dtype", + [ + "string[python]", + pytest.param( + "string[pyarrow]", + marks=td.skip_if_no("pyarrow"), + ), + pytest.param( + "str", + marks=td.skip_if_no("pyarrow"), + ), + ], +) +def test_index_comparison_different_string_dtype(dtype): + # GH 61099 + idx = Index(["a", "b", "c"]) + s_obj = Series([1, 2, 3], index=idx) + s_str = Series([4, 5, 6], index=idx.astype(dtype)) + + expected = Series([True, True, True], index=["a", "b", "c"]) + result = s_obj < s_str + assert_series_equal(result, expected)