diff --git a/pandas/core/dtypes/inference.py b/pandas/core/dtypes/inference.py index 928e44e51b3cf..a3d8456b516b1 100644 --- a/pandas/core/dtypes/inference.py +++ b/pandas/core/dtypes/inference.py @@ -376,10 +376,15 @@ def is_named_tuple(obj: object) -> bool: return isinstance(obj, abc.Sequence) and hasattr(obj, "_fields") -def is_hashable(obj: object) -> TypeGuard[Hashable]: +def is_hashable(obj: object, allow_slice: bool | None = None) -> TypeGuard[Hashable]: """ Return True if hash(obj) will succeed, False otherwise. + If `allow_slice` is False, objects that are slices or tuples containing slices + will always return False, even if hash(obj) would succeed. + If `allow_slice` is True or None, slices and tuples containing slices are treated as + hashable if hash(obj) does not raise TypeError. + Some types will pass a test against collections.abc.Hashable but fail when they are actually hashed with hash(). @@ -390,13 +395,17 @@ def is_hashable(obj: object) -> TypeGuard[Hashable]: ---------- obj : object The object to check for hashability. Any Python object can be passed here. + allow_slice : bool or None + If True or None, return True if the object is hashable (including slices). + If False, return True if the object is hashable and not a slice. Returns ------- bool True if object can be hashed (i.e., does not raise TypeError when - passed to hash()), and False otherwise (e.g., if object is mutable - like a list or dictionary). + passed to hash()) and passes the slice check according to 'allow_slice'. + False otherwise (e.g., if object is mutable like a list or dictionary + or if allow_slice is False and object is a slice or contains a slice). See Also -------- @@ -422,6 +431,17 @@ def is_hashable(obj: object) -> TypeGuard[Hashable]: # Reconsider this decision once this numpy bug is fixed: # https://github.com/numpy/numpy/issues/5562 + def _contains_slice(x: object) -> bool: + # Check if object is a slice or a tuple containing a slice + if isinstance(x, tuple): + return any(isinstance(v, slice) for v in x) + elif isinstance(x, slice): + return True + return False + + if allow_slice is False and _contains_slice(obj): + return False + try: hash(obj) except TypeError: diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 643974db5f2bf..70e90890cf725 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4050,7 +4050,7 @@ def __getitem__(self, key): key = lib.item_from_zerodim(key) key = com.apply_if_callable(key, self) - if is_hashable(key) and not is_iterator(key) and not isinstance(key, slice): + if is_hashable(key, allow_slice=False) and not is_iterator(key): # is_iterator to exclude generator e.g. test_getitem_listlike # As of Python 3.12, slice is hashable which breaks MultiIndex (GH#57500) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 9246309c0c7f1..5c1ec2392ad81 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -793,8 +793,7 @@ def _get_setitem_indexer(self, key): if ( isinstance(ax, MultiIndex) and self.name != "iloc" - and is_hashable(key) - and not isinstance(key, slice) + and is_hashable(key, allow_slice=False) ): with suppress(KeyError, InvalidIndexError): # TypeError e.g. passed a bool @@ -1147,8 +1146,7 @@ def _contains_slice(x: object) -> bool: # This should never be reached, but let's be explicit about it raise ValueError("Too many indices") # pragma: no cover if all( - (is_hashable(x) and not _contains_slice(x)) or com.is_null_slice(x) - for x in tup + is_hashable(x, allow_slice=False) or com.is_null_slice(x) for x in tup ): # GH#10521 Series should reduce MultiIndex dimensions instead of # DataFrame, IndexingError is not raised when slice(None,None,None) @@ -1511,12 +1509,8 @@ def _convert_to_indexer(self, key, axis: AxisInt): # Slices are not valid keys passed in by the user, # even though they are hashable in Python 3.12 - contains_slice = False - if isinstance(key, tuple): - contains_slice = any(isinstance(v, slice) for v in key) - if is_scalar(key) or ( - isinstance(labels, MultiIndex) and is_hashable(key) and not contains_slice + isinstance(labels, MultiIndex) and is_hashable(key, allow_slice=False) ): # Otherwise get_loc will raise InvalidIndexError diff --git a/pandas/core/series.py b/pandas/core/series.py index a5c3bb8d51e8a..8066e32fb2927 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -953,7 +953,7 @@ def __getitem__(self, key): if is_iterator(key): key = list(key) - if is_hashable(key) and not isinstance(key, slice): + if is_hashable(key, allow_slice=False): # Otherwise index.get_value will raise InvalidIndexError try: # For labels that don't resolve as scalars like tuples and frozensets diff --git a/pandas/io/excel/_xlsxwriter.py b/pandas/io/excel/_xlsxwriter.py index 5874f720e3bd0..4a7b8eee2bfce 100644 --- a/pandas/io/excel/_xlsxwriter.py +++ b/pandas/io/excel/_xlsxwriter.py @@ -213,7 +213,7 @@ def __init__( # pyright: ignore[reportInconsistentConstructor] ) try: - self._book = Workbook(self._handles.handle, **engine_kwargs) # type: ignore[arg-type] + self._book = Workbook(self._handles.handle, **engine_kwargs) except TypeError: self._handles.handle.close() raise diff --git a/pandas/tests/dtypes/test_inference.py b/pandas/tests/dtypes/test_inference.py index d0955912e12c8..116adcb883326 100644 --- a/pandas/tests/dtypes/test_inference.py +++ b/pandas/tests/dtypes/test_inference.py @@ -34,6 +34,7 @@ missing as libmissing, ops as libops, ) +from pandas.compat import PY312 from pandas.compat.numpy import np_version_gt2 from pandas.errors import Pandas4Warning @@ -452,16 +453,55 @@ class UnhashableClass2: def __hash__(self): raise TypeError("Not hashable") + class HashableSlice: + def __init__(self, start, stop, step=None): + self.slice = slice(start, stop, step) + + def __eq__(self, other): + return isinstance(other, HashableSlice) and self.slice == other.slice + + def __hash__(self): + return hash((self.slice.start, self.slice.stop, self.slice.step)) + + def __repr__(self): + return ( + f"HashableSlice({self.slice.start}, {self.slice.stop}, " + f"{self.slice.step})" + ) + hashable = (1, 3.14, np.float64(3.14), "a", (), (1,), HashableClass()) not_hashable = ([], UnhashableClass1()) abc_hashable_not_really_hashable = (([],), UnhashableClass2()) + hashable_slice = HashableSlice(1, 2) + tuple_with_slice = (slice(1, 2), 3) for i in hashable: assert inference.is_hashable(i) + assert inference.is_hashable(i, allow_slice=True) + assert inference.is_hashable(i, allow_slice=False) for i in not_hashable: assert not inference.is_hashable(i) + assert not inference.is_hashable(i, allow_slice=True) + assert not inference.is_hashable(i, allow_slice=False) for i in abc_hashable_not_really_hashable: assert not inference.is_hashable(i) + assert not inference.is_hashable(i, allow_slice=True) + assert not inference.is_hashable(i, allow_slice=False) + + assert inference.is_hashable(hashable_slice) + assert inference.is_hashable(hashable_slice, allow_slice=True) + assert inference.is_hashable(hashable_slice, allow_slice=False) + + if PY312: + for obj in [slice(1, 2), tuple_with_slice]: + assert inference.is_hashable(obj) + assert inference.is_hashable(obj, allow_slice=True) + assert not inference.is_hashable(obj, allow_slice=False) + else: + for obj in [slice(1, 2), tuple_with_slice]: + assert not inference.is_hashable(obj) + assert not inference.is_hashable(obj, allow_slice=True) + assert not inference.is_hashable(obj, allow_slice=False) # numpy.array is no longer collections.abc.Hashable as of # https://github.com/numpy/numpy/pull/5326, just test