feat: initial commit - Phase 1 & 2 core features

2026-04-22 17:07:33 +08:00
commit 1773bda06b
25005 changed files with 6252106 additions and 0 deletions
@@ -0,0 +1,122 @@
+"""
+The :mod:`sklearn.utils` module includes various utilities.
+"""
+
+import warnings
+from collections.abc import Sequence
+
+import numpy as np
+
+from ..exceptions import DataConversionWarning
+from . import _joblib, metadata_routing
+from ._bunch import Bunch
+from ._chunking import gen_batches, gen_even_slices
+from ._estimator_html_repr import estimator_html_repr
+
+# Make _safe_indexing importable from here for backward compat as this particular
+# helper is considered semi-private and typically very useful for third-party
+# libraries that want to comply with scikit-learn's estimator API. In particular,
+# _safe_indexing was included in our public API documentation despite the leading
+# `_` in its name.
+from ._indexing import (
+    _safe_indexing,  # noqa
+    resample,
+    shuffle,
+)
+from ._mask import safe_mask
+from .class_weight import compute_class_weight, compute_sample_weight
+from .deprecation import deprecated
+from .discovery import all_estimators
+from .extmath import safe_sqr
+from .murmurhash import murmurhash3_32
+from .validation import (
+    as_float_array,
+    assert_all_finite,
+    check_array,
+    check_consistent_length,
+    check_random_state,
+    check_scalar,
+    check_symmetric,
+    check_X_y,
+    column_or_1d,
+    indexable,
+)
+
+# TODO(1.7): remove parallel_backend and register_parallel_backend
+msg = "deprecated in 1.5 to be removed in 1.7. Use joblib.{} instead."
+register_parallel_backend = deprecated(msg)(_joblib.register_parallel_backend)
+
+
+# if a class, deprecated will change the object in _joblib module so we need to subclass
+@deprecated(msg)
+class parallel_backend(_joblib.parallel_backend):
+    pass
+
+
+__all__ = [
+    "murmurhash3_32",
+    "as_float_array",
+    "assert_all_finite",
+    "check_array",
+    "check_random_state",
+    "compute_class_weight",
+    "compute_sample_weight",
+    "column_or_1d",
+    "check_consistent_length",
+    "check_X_y",
+    "check_scalar",
+    "indexable",
+    "check_symmetric",
+    "deprecated",
+    "parallel_backend",
+    "register_parallel_backend",
+    "resample",
+    "shuffle",
+    "all_estimators",
+    "DataConversionWarning",
+    "estimator_html_repr",
+    "Bunch",
+    "metadata_routing",
+    "safe_sqr",
+    "safe_mask",
+    "gen_batches",
+    "gen_even_slices",
+]
+
+
+# TODO(1.7): remove
+def __getattr__(name):
+    if name == "IS_PYPY":
+        warnings.warn(
+            "IS_PYPY is deprecated and will be removed in 1.7.",
+            FutureWarning,
+        )
+        from .fixes import _IS_PYPY
+
+        return _IS_PYPY
+    raise AttributeError(f"module {__name__} has no attribute {name}")
+
+
+# TODO(1.7): remove tosequence
+@deprecated("tosequence was deprecated in 1.5 and will be removed in 1.7")
+def tosequence(x):
+    """Cast iterable x to a Sequence, avoiding a copy if possible.
+
+    Parameters
+    ----------
+    x : iterable
+        The iterable to be converted.
+
+    Returns
+    -------
+    x : Sequence
+        If `x` is a NumPy array, it returns it as a `ndarray`. If `x`
+        is a `Sequence`, `x` is returned as-is. If `x` is from any other
+        type, `x` is returned casted as a list.
+    """
+    if isinstance(x, np.ndarray):
+        return np.asarray(x)
+    elif isinstance(x, Sequence):
+        return x
+    else:
+        return list(x)
@@ -0,0 +1,30 @@
+from .validation import check_random_state
+
+
+def _init_arpack_v0(size, random_state):
+    """Initialize the starting vector for iteration in ARPACK functions.
+
+    Initialize a ndarray with values sampled from the uniform distribution on
+    [-1, 1]. This initialization model has been chosen to be consistent with
+    the ARPACK one as another initialization can lead to convergence issues.
+
+    Parameters
+    ----------
+    size : int
+        The size of the eigenvalue vector to be initialized.
+
+    random_state : int, RandomState instance or None, default=None
+        The seed of the pseudo random number generator used to generate a
+        uniform distribution. If int, random_state is the seed used by the
+        random number generator; If RandomState instance, random_state is the
+        random number generator; If None, the random number generator is the
+        RandomState instance used by `np.random`.
+
+    Returns
+    -------
+    v0 : ndarray of shape (size,)
+        The initialized vector.
+    """
+    random_state = check_random_state(random_state)
+    v0 = random_state.uniform(-1, 1, size)
+    return v0
@@ -0,0 +1,838 @@
+"""Tools to support array_api."""
+
+import itertools
+import math
+from functools import wraps
+
+import numpy
+import scipy.special as special
+
+from .._config import get_config
+from .fixes import parse_version
+
+_NUMPY_NAMESPACE_NAMES = {"numpy", "array_api_compat.numpy"}
+
+
+def yield_namespaces(include_numpy_namespaces=True):
+    """Yield supported namespace.
+
+    This is meant to be used for testing purposes only.
+
+    Parameters
+    ----------
+    include_numpy_namespaces : bool, default=True
+        If True, also yield numpy namespaces.
+
+    Returns
+    -------
+    array_namespace : str
+        The name of the Array API namespace.
+    """
+    for array_namespace in [
+        # The following is used to test the array_api_compat wrapper when
+        # array_api_dispatch is enabled: in particular, the arrays used in the
+        # tests are regular numpy arrays without any "device" attribute.
+        "numpy",
+        # Stricter NumPy-based Array API implementation. The
+        # array_api_strict.Array instances always have a dummy "device" attribute.
+        "array_api_strict",
+        "cupy",
+        "cupy.array_api",
+        "torch",
+    ]:
+        if not include_numpy_namespaces and array_namespace in _NUMPY_NAMESPACE_NAMES:
+            continue
+        yield array_namespace
+
+
+def yield_namespace_device_dtype_combinations(include_numpy_namespaces=True):
+    """Yield supported namespace, device, dtype tuples for testing.
+
+    Use this to test that an estimator works with all combinations.
+
+    Parameters
+    ----------
+    include_numpy_namespaces : bool, default=True
+        If True, also yield numpy namespaces.
+
+    Returns
+    -------
+    array_namespace : str
+        The name of the Array API namespace.
+
+    device : str
+        The name of the device on which to allocate the arrays. Can be None to
+        indicate that the default value should be used.
+
+    dtype_name : str
+        The name of the data type to use for arrays. Can be None to indicate
+        that the default value should be used.
+    """
+    for array_namespace in yield_namespaces(
+        include_numpy_namespaces=include_numpy_namespaces
+    ):
+        if array_namespace == "torch":
+            for device, dtype in itertools.product(
+                ("cpu", "cuda"), ("float64", "float32")
+            ):
+                yield array_namespace, device, dtype
+            yield array_namespace, "mps", "float32"
+        else:
+            yield array_namespace, None, None
+
+
+def _check_array_api_dispatch(array_api_dispatch):
+    """Check that array_api_compat is installed and NumPy version is compatible.
+
+    array_api_compat follows NEP29, which has a higher minimum NumPy version than
+    scikit-learn.
+    """
+    if array_api_dispatch:
+        try:
+            import array_api_compat  # noqa
+        except ImportError:
+            raise ImportError(
+                "array_api_compat is required to dispatch arrays using the API"
+                " specification"
+            )
+
+        numpy_version = parse_version(numpy.__version__)
+        min_numpy_version = "1.21"
+        if numpy_version < parse_version(min_numpy_version):
+            raise ImportError(
+                f"NumPy must be {min_numpy_version} or newer to dispatch array using"
+                " the API specification"
+            )
+
+
+def _single_array_device(array):
+    """Hardware device where the array data resides on."""
+    if isinstance(array, (numpy.ndarray, numpy.generic)) or not hasattr(
+        array, "device"
+    ):
+        return "cpu"
+    else:
+        return array.device
+
+
+def device(*array_list, remove_none=True, remove_types=(str,)):
+    """Hardware device where the array data resides on.
+
+    If the hardware device is not the same for all arrays, an error is raised.
+
+    Parameters
+    ----------
+    *array_list : arrays
+        List of array instances from NumPy or an array API compatible library.
+
+    remove_none : bool, default=True
+        Whether to ignore None objects passed in array_list.
+
+    remove_types : tuple or list, default=(str,)
+        Types to ignore in array_list.
+
+    Returns
+    -------
+    out : device
+        `device` object (see the "Device Support" section of the array API spec).
+    """
+    array_list = _remove_non_arrays(
+        *array_list, remove_none=remove_none, remove_types=remove_types
+    )
+
+    # Note that _remove_non_arrays ensures that array_list is not empty.
+    device_ = _single_array_device(array_list[0])
+
+    # Note: here we cannot simply use a Python `set` as it requires
+    # hashable members which is not guaranteed for Array API device
+    # objects. In particular, CuPy devices are not hashable at the
+    # time of writing.
+    for array in array_list[1:]:
+        device_other = _single_array_device(array)
+        if device_ != device_other:
+            raise ValueError(
+                f"Input arrays use different devices: {str(device_)}, "
+                f"{str(device_other)}"
+            )
+
+    return device_
+
+
+def size(x):
+    """Return the total number of elements of x.
+
+    Parameters
+    ----------
+    x : array
+        Array instance from NumPy or an array API compatible library.
+
+    Returns
+    -------
+    out : int
+        Total number of elements.
+    """
+    return math.prod(x.shape)
+
+
+def _is_numpy_namespace(xp):
+    """Return True if xp is backed by NumPy."""
+    return xp.__name__ in _NUMPY_NAMESPACE_NAMES
+
+
+def _union1d(a, b, xp):
+    if _is_numpy_namespace(xp):
+        return xp.asarray(numpy.union1d(a, b))
+    assert a.ndim == b.ndim == 1
+    return xp.unique_values(xp.concat([xp.unique_values(a), xp.unique_values(b)]))
+
+
+def isdtype(dtype, kind, *, xp):
+    """Returns a boolean indicating whether a provided dtype is of type "kind".
+
+    Included in the v2022.12 of the Array API spec.
+    https://data-apis.org/array-api/latest/API_specification/generated/array_api.isdtype.html
+    """
+    if isinstance(kind, tuple):
+        return any(_isdtype_single(dtype, k, xp=xp) for k in kind)
+    else:
+        return _isdtype_single(dtype, kind, xp=xp)
+
+
+def _isdtype_single(dtype, kind, *, xp):
+    if isinstance(kind, str):
+        if kind == "bool":
+            return dtype == xp.bool
+        elif kind == "signed integer":
+            return dtype in {xp.int8, xp.int16, xp.int32, xp.int64}
+        elif kind == "unsigned integer":
+            return dtype in {xp.uint8, xp.uint16, xp.uint32, xp.uint64}
+        elif kind == "integral":
+            return any(
+                _isdtype_single(dtype, k, xp=xp)
+                for k in ("signed integer", "unsigned integer")
+            )
+        elif kind == "real floating":
+            return dtype in supported_float_dtypes(xp)
+        elif kind == "complex floating":
+            # Some name spaces do not have complex, such as cupy.array_api
+            complex_dtypes = set()
+            if hasattr(xp, "complex64"):
+                complex_dtypes.add(xp.complex64)
+            if hasattr(xp, "complex128"):
+                complex_dtypes.add(xp.complex128)
+            return dtype in complex_dtypes
+        elif kind == "numeric":
+            return any(
+                _isdtype_single(dtype, k, xp=xp)
+                for k in ("integral", "real floating", "complex floating")
+            )
+        else:
+            raise ValueError(f"Unrecognized data type kind: {kind!r}")
+    else:
+        return dtype == kind
+
+
+def supported_float_dtypes(xp):
+    """Supported floating point types for the namespace.
+
+    Note: float16 is not officially part of the Array API spec at the
+    time of writing but scikit-learn estimators and functions can choose
+    to accept it when xp.float16 is defined.
+
+    https://data-apis.org/array-api/latest/API_specification/data_types.html
+    """
+    if hasattr(xp, "float16"):
+        return (xp.float64, xp.float32, xp.float16)
+    else:
+        return (xp.float64, xp.float32)
+
+
+def ensure_common_namespace_device(reference, *arrays):
+    """Ensure that all arrays use the same namespace and device as reference.
+
+    If neccessary the arrays are moved to the same namespace and device as
+    the reference array.
+
+    Parameters
+    ----------
+    reference : array
+        Reference array.
+
+    *arrays : array
+        Arrays to check.
+
+    Returns
+    -------
+    arrays : list
+        Arrays with the same namespace and device as reference.
+    """
+    xp, is_array_api = get_namespace(reference)
+
+    if is_array_api:
+        device_ = device(reference)
+        # Move arrays to the same namespace and device as the reference array.
+        return [xp.asarray(a, device=device_) for a in arrays]
+    else:
+        return arrays
+
+
+class _ArrayAPIWrapper:
+    """sklearn specific Array API compatibility wrapper
+
+    This wrapper makes it possible for scikit-learn maintainers to
+    deal with discrepancies between different implementations of the
+    Python Array API standard and its evolution over time.
+
+    The Python Array API standard specification:
+    https://data-apis.org/array-api/latest/
+
+    Documentation of the NumPy implementation:
+    https://numpy.org/neps/nep-0047-array-api-standard.html
+    """
+
+    def __init__(self, array_namespace):
+        self._namespace = array_namespace
+
+    def __getattr__(self, name):
+        return getattr(self._namespace, name)
+
+    def __eq__(self, other):
+        return self._namespace == other._namespace
+
+    def isdtype(self, dtype, kind):
+        return isdtype(dtype, kind, xp=self._namespace)
+
+
+def _check_device_cpu(device):  # noqa
+    if device not in {"cpu", None}:
+        raise ValueError(f"Unsupported device for NumPy: {device!r}")
+
+
+def _accept_device_cpu(func):
+    @wraps(func)
+    def wrapped_func(*args, **kwargs):
+        _check_device_cpu(kwargs.pop("device", None))
+        return func(*args, **kwargs)
+
+    return wrapped_func
+
+
+class _NumPyAPIWrapper:
+    """Array API compat wrapper for any numpy version
+
+    NumPy < 2 does not implement the namespace. NumPy 2 and later should
+    progressively implement more an more of the latest Array API spec but this
+    is still work in progress at this time.
+
+    This wrapper makes it possible to write code that uses the standard Array
+    API while working with any version of NumPy supported by scikit-learn.
+
+    See the `get_namespace()` public function for more details.
+    """
+
+    # TODO: once scikit-learn drops support for NumPy < 2, this class can be
+    # removed, assuming Array API compliance of NumPy 2 is actually sufficient
+    # for scikit-learn's needs.
+
+    # Creation functions in spec:
+    # https://data-apis.org/array-api/latest/API_specification/creation_functions.html
+    _CREATION_FUNCS = {
+        "arange",
+        "empty",
+        "empty_like",
+        "eye",
+        "full",
+        "full_like",
+        "linspace",
+        "ones",
+        "ones_like",
+        "zeros",
+        "zeros_like",
+    }
+    # Data types in spec
+    # https://data-apis.org/array-api/latest/API_specification/data_types.html
+    _DTYPES = {
+        "int8",
+        "int16",
+        "int32",
+        "int64",
+        "uint8",
+        "uint16",
+        "uint32",
+        "uint64",
+        # XXX: float16 is not part of the Array API spec but exposed by
+        # some namespaces.
+        "float16",
+        "float32",
+        "float64",
+        "complex64",
+        "complex128",
+    }
+
+    def __getattr__(self, name):
+        attr = getattr(numpy, name)
+
+        # Support device kwargs and make sure they are on the CPU
+        if name in self._CREATION_FUNCS:
+            return _accept_device_cpu(attr)
+
+        # Convert to dtype objects
+        if name in self._DTYPES:
+            return numpy.dtype(attr)
+        return attr
+
+    @property
+    def bool(self):
+        return numpy.bool_
+
+    def astype(self, x, dtype, *, copy=True, casting="unsafe"):
+        # astype is not defined in the top level NumPy namespace
+        return x.astype(dtype, copy=copy, casting=casting)
+
+    def asarray(self, x, *, dtype=None, device=None, copy=None):  # noqa
+        _check_device_cpu(device)
+        # Support copy in NumPy namespace
+        if copy is True:
+            return numpy.array(x, copy=True, dtype=dtype)
+        else:
+            return numpy.asarray(x, dtype=dtype)
+
+    def unique_inverse(self, x):
+        return numpy.unique(x, return_inverse=True)
+
+    def unique_counts(self, x):
+        return numpy.unique(x, return_counts=True)
+
+    def unique_values(self, x):
+        return numpy.unique(x)
+
+    def concat(self, arrays, *, axis=None):
+        return numpy.concatenate(arrays, axis=axis)
+
+    def reshape(self, x, shape, *, copy=None):
+        """Gives a new shape to an array without changing its data.
+
+        The Array API specification requires shape to be a tuple.
+        https://data-apis.org/array-api/latest/API_specification/generated/array_api.reshape.html
+        """
+        if not isinstance(shape, tuple):
+            raise TypeError(
+                f"shape must be a tuple, got {shape!r} of type {type(shape)}"
+            )
+
+        if copy is True:
+            x = x.copy()
+        return numpy.reshape(x, shape)
+
+    def isdtype(self, dtype, kind):
+        return isdtype(dtype, kind, xp=self)
+
+
+_NUMPY_API_WRAPPER_INSTANCE = _NumPyAPIWrapper()
+
+
+def _remove_non_arrays(*arrays, remove_none=True, remove_types=(str,)):
+    """Filter arrays to exclude None and/or specific types.
+
+    Raise ValueError if no arrays are left after filtering.
+
+    Parameters
+    ----------
+    *arrays : array objects
+        Array objects.
+
+    remove_none : bool, default=True
+        Whether to ignore None objects passed in arrays.
+
+    remove_types : tuple or list, default=(str,)
+        Types to ignore in the arrays.
+
+    Returns
+    -------
+    filtered_arrays : list
+        List of arrays with None and typoe
+    """
+    filtered_arrays = []
+    remove_types = tuple(remove_types)
+    for array in arrays:
+        if remove_none and array is None:
+            continue
+        if isinstance(array, remove_types):
+            continue
+        filtered_arrays.append(array)
+
+    if not filtered_arrays:
+        raise ValueError(
+            f"At least one input array expected after filtering with {remove_none=}, "
+            f"remove_types=[{', '.join(t.__name__ for t in remove_types)}]. Got none. "
+            f"Original types: [{', '.join(type(a).__name__ for a in arrays)}]."
+        )
+    return filtered_arrays
+
+
+def get_namespace(*arrays, remove_none=True, remove_types=(str,), xp=None):
+    """Get namespace of arrays.
+
+    Introspect `arrays` arguments and return their common Array API compatible
+    namespace object, if any.
+
+    See: https://numpy.org/neps/nep-0047-array-api-standard.html
+
+    If `arrays` are regular numpy arrays, an instance of the `_NumPyAPIWrapper`
+    compatibility wrapper is returned instead.
+
+    Namespace support is not enabled by default. To enabled it call:
+
+      sklearn.set_config(array_api_dispatch=True)
+
+    or:
+
+      with sklearn.config_context(array_api_dispatch=True):
+          # your code here
+
+    Otherwise an instance of the `_NumPyAPIWrapper` compatibility wrapper is
+    always returned irrespective of the fact that arrays implement the
+    `__array_namespace__` protocol or not.
+
+    Parameters
+    ----------
+    *arrays : array objects
+        Array objects.
+
+    remove_none : bool, default=True
+        Whether to ignore None objects passed in arrays.
+
+    remove_types : tuple or list, default=(str,)
+        Types to ignore in the arrays.
+
+    xp : module, default=None
+        Precomputed array namespace module. When passed, typically from a caller
+        that has already performed inspection of its own inputs, skips array
+        namespace inspection.
+
+    Returns
+    -------
+    namespace : module
+        Namespace shared by array objects. If any of the `arrays` are not arrays,
+        the namespace defaults to NumPy.
+
+    is_array_api_compliant : bool
+        True if the arrays are containers that implement the Array API spec.
+        Always False when array_api_dispatch=False.
+    """
+    array_api_dispatch = get_config()["array_api_dispatch"]
+    if not array_api_dispatch:
+        if xp is not None:
+            return xp, False
+        else:
+            return _NUMPY_API_WRAPPER_INSTANCE, False
+
+    if xp is not None:
+        return xp, True
+
+    arrays = _remove_non_arrays(
+        *arrays, remove_none=remove_none, remove_types=remove_types
+    )
+
+    _check_array_api_dispatch(array_api_dispatch)
+
+    # array-api-compat is a required dependency of scikit-learn only when
+    # configuring `array_api_dispatch=True`. Its import should therefore be
+    # protected by _check_array_api_dispatch to display an informative error
+    # message in case it is missing.
+    import array_api_compat
+
+    namespace, is_array_api_compliant = array_api_compat.get_namespace(*arrays), True
+
+    # These namespaces need additional wrapping to smooth out small differences
+    # between implementations
+    if namespace.__name__ in {"cupy.array_api"}:
+        namespace = _ArrayAPIWrapper(namespace)
+
+    return namespace, is_array_api_compliant
+
+
+def get_namespace_and_device(*array_list, remove_none=True, remove_types=(str,)):
+    """Combination into one single function of `get_namespace` and `device`."""
+    array_list = _remove_non_arrays(
+        *array_list, remove_none=remove_none, remove_types=remove_types
+    )
+
+    skip_remove_kwargs = dict(remove_none=False, remove_types=[])
+
+    return (
+        *get_namespace(*array_list, **skip_remove_kwargs),
+        device(*array_list, **skip_remove_kwargs),
+    )
+
+
+def _expit(X, xp=None):
+    xp, _ = get_namespace(X, xp=xp)
+    if _is_numpy_namespace(xp):
+        return xp.asarray(special.expit(numpy.asarray(X)))
+
+    return 1.0 / (1.0 + xp.exp(-X))
+
+
+def _add_to_diagonal(array, value, xp):
+    # Workaround for the lack of support for xp.reshape(a, shape, copy=False) in
+    # numpy.array_api: https://github.com/numpy/numpy/issues/23410
+    value = xp.asarray(value, dtype=array.dtype)
+    if _is_numpy_namespace(xp):
+        array_np = numpy.asarray(array)
+        array_np.flat[:: array.shape[0] + 1] += value
+        return xp.asarray(array_np)
+    elif value.ndim == 1:
+        for i in range(array.shape[0]):
+            array[i, i] += value[i]
+    else:
+        # scalar value
+        for i in range(array.shape[0]):
+            array[i, i] += value
+
+
+def _find_matching_floating_dtype(*arrays, xp):
+    """Find a suitable floating point dtype when computing with arrays.
+
+    If any of the arrays are floating point, return the dtype with the highest
+    precision by following official type promotion rules:
+
+    https://data-apis.org/array-api/latest/API_specification/type_promotion.html
+
+    If there are no floating point input arrays (all integral inputs for
+    instance), return the default floating point dtype for the namespace.
+    """
+    dtyped_arrays = [a for a in arrays if hasattr(a, "dtype")]
+    floating_dtypes = [
+        a.dtype for a in dtyped_arrays if xp.isdtype(a.dtype, "real floating")
+    ]
+    if floating_dtypes:
+        # Return the floating dtype with the highest precision:
+        return xp.result_type(*floating_dtypes)
+
+    # If none of the input arrays have a floating point dtype, they must be all
+    # integer arrays or containers of Python scalars: return the default
+    # floating point dtype for the namespace (implementation specific).
+    return xp.asarray(0.0).dtype
+
+
+def _average(a, axis=None, weights=None, normalize=True, xp=None):
+    """Partial port of np.average to support the Array API.
+
+    It does a best effort at mimicking the return dtype rule described at
+    https://numpy.org/doc/stable/reference/generated/numpy.average.html but
+    only for the common cases needed in scikit-learn.
+    """
+    xp, _, device_ = get_namespace_and_device(a, weights)
+
+    if _is_numpy_namespace(xp):
+        if normalize:
+            return xp.asarray(numpy.average(a, axis=axis, weights=weights))
+        elif axis is None and weights is not None:
+            return xp.asarray(numpy.dot(a, weights))
+
+    a = xp.asarray(a, device=device_)
+    if weights is not None:
+        weights = xp.asarray(weights, device=device_)
+
+    if weights is not None and a.shape != weights.shape:
+        if axis is None:
+            raise TypeError(
+                f"Axis must be specified when the shape of a {tuple(a.shape)} and "
+                f"weights {tuple(weights.shape)} differ."
+            )
+
+        if weights.ndim != 1:
+            raise TypeError(
+                f"1D weights expected when a.shape={tuple(a.shape)} and "
+                f"weights.shape={tuple(weights.shape)} differ."
+            )
+
+        if size(weights) != a.shape[axis]:
+            raise ValueError(
+                f"Length of weights {size(weights)} not compatible with "
+                f" a.shape={tuple(a.shape)} and {axis=}."
+            )
+
+        # If weights are 1D, add singleton dimensions for broadcasting
+        shape = [1] * a.ndim
+        shape[axis] = a.shape[axis]
+        weights = xp.reshape(weights, shape)
+
+    if xp.isdtype(a.dtype, "complex floating"):
+        raise NotImplementedError(
+            "Complex floating point values are not supported by average."
+        )
+    if weights is not None and xp.isdtype(weights.dtype, "complex floating"):
+        raise NotImplementedError(
+            "Complex floating point values are not supported by average."
+        )
+
+    output_dtype = _find_matching_floating_dtype(a, weights, xp=xp)
+    a = xp.astype(a, output_dtype)
+
+    if weights is None:
+        return (xp.mean if normalize else xp.sum)(a, axis=axis)
+
+    weights = xp.astype(weights, output_dtype)
+
+    sum_ = xp.sum(xp.multiply(a, weights), axis=axis)
+
+    if not normalize:
+        return sum_
+
+    scale = xp.sum(weights, axis=axis)
+    if xp.any(scale == 0.0):
+        raise ZeroDivisionError("Weights sum to zero, can't be normalized")
+
+    return sum_ / scale
+
+
+def _nanmin(X, axis=None, xp=None):
+    # TODO: refactor once nan-aware reductions are standardized:
+    # https://github.com/data-apis/array-api/issues/621
+    xp, _ = get_namespace(X, xp=xp)
+    if _is_numpy_namespace(xp):
+        return xp.asarray(numpy.nanmin(X, axis=axis))
+
+    else:
+        mask = xp.isnan(X)
+        X = xp.min(xp.where(mask, xp.asarray(+xp.inf, device=device(X)), X), axis=axis)
+        # Replace Infs from all NaN slices with NaN again
+        mask = xp.all(mask, axis=axis)
+        if xp.any(mask):
+            X = xp.where(mask, xp.asarray(xp.nan), X)
+        return X
+
+
+def _nanmax(X, axis=None, xp=None):
+    # TODO: refactor once nan-aware reductions are standardized:
+    # https://github.com/data-apis/array-api/issues/621
+    xp, _ = get_namespace(X, xp=xp)
+    if _is_numpy_namespace(xp):
+        return xp.asarray(numpy.nanmax(X, axis=axis))
+
+    else:
+        mask = xp.isnan(X)
+        X = xp.max(xp.where(mask, xp.asarray(-xp.inf, device=device(X)), X), axis=axis)
+        # Replace Infs from all NaN slices with NaN again
+        mask = xp.all(mask, axis=axis)
+        if xp.any(mask):
+            X = xp.where(mask, xp.asarray(xp.nan), X)
+        return X
+
+
+def _asarray_with_order(
+    array, dtype=None, order=None, copy=None, *, xp=None, device=None
+):
+    """Helper to support the order kwarg only for NumPy-backed arrays
+
+    Memory layout parameter `order` is not exposed in the Array API standard,
+    however some input validation code in scikit-learn needs to work both
+    for classes and functions that will leverage Array API only operations
+    and for code that inherently relies on NumPy backed data containers with
+    specific memory layout constraints (e.g. our own Cython code). The
+    purpose of this helper is to make it possible to share code for data
+    container validation without memory copies for both downstream use cases:
+    the `order` parameter is only enforced if the input array implementation
+    is NumPy based, otherwise `order` is just silently ignored.
+    """
+    xp, _ = get_namespace(array, xp=xp)
+    if _is_numpy_namespace(xp):
+        # Use NumPy API to support order
+        if copy is True:
+            array = numpy.array(array, order=order, dtype=dtype)
+        else:
+            array = numpy.asarray(array, order=order, dtype=dtype)
+
+        # At this point array is a NumPy ndarray. We convert it to an array
+        # container that is consistent with the input's namespace.
+        return xp.asarray(array)
+    else:
+        return xp.asarray(array, dtype=dtype, copy=copy, device=device)
+
+
+def _ravel(array, xp=None):
+    """Array API compliant version of np.ravel.
+
+    For non numpy namespaces, it just returns a flattened array, that might
+    be or not be a copy.
+    """
+    xp, _ = get_namespace(array, xp=xp)
+    if _is_numpy_namespace(xp):
+        array = numpy.asarray(array)
+        return xp.asarray(numpy.ravel(array, order="C"))
+
+    return xp.reshape(array, shape=(-1,))
+
+
+def _convert_to_numpy(array, xp):
+    """Convert X into a NumPy ndarray on the CPU."""
+    xp_name = xp.__name__
+
+    if xp_name in {"array_api_compat.torch", "torch"}:
+        return array.cpu().numpy()
+    elif xp_name == "cupy.array_api":
+        return array._array.get()
+    elif xp_name in {"array_api_compat.cupy", "cupy"}:  # pragma: nocover
+        return array.get()
+
+    return numpy.asarray(array)
+
+
+def _estimator_with_converted_arrays(estimator, converter):
+    """Create new estimator which converting all attributes that are arrays.
+
+    The converter is called on all NumPy arrays and arrays that support the
+    `DLPack interface <https://dmlc.github.io/dlpack/latest/>`__.
+
+    Parameters
+    ----------
+    estimator : Estimator
+        Estimator to convert
+
+    converter : callable
+        Callable that takes an array attribute and returns the converted array.
+
+    Returns
+    -------
+    new_estimator : Estimator
+        Convert estimator
+    """
+    from sklearn.base import clone
+
+    new_estimator = clone(estimator)
+    for key, attribute in vars(estimator).items():
+        if hasattr(attribute, "__dlpack__") or isinstance(attribute, numpy.ndarray):
+            attribute = converter(attribute)
+        setattr(new_estimator, key, attribute)
+    return new_estimator
+
+
+def _atol_for_type(dtype):
+    """Return the absolute tolerance for a given numpy dtype."""
+    return numpy.finfo(dtype).eps * 100
+
+
+def indexing_dtype(xp):
+    """Return a platform-specific integer dtype suitable for indexing.
+
+    On 32-bit platforms, this will typically return int32 and int64 otherwise.
+
+    Note: using dtype is recommended for indexing transient array
+    datastructures. For long-lived arrays, such as the fitted attributes of
+    estimators, it is instead recommended to use platform-independent int32 if
+    we do not expect to index more 2B elements. Using fixed dtypes simplifies
+    the handling of serialized models, e.g. to deploy a model fit on a 64-bit
+    platform to a target 32-bit platform such as WASM/pyodide.
+    """
+    # Currently this is implemented with simple hack that assumes that
+    # following "may be" statements in the Array API spec always hold:
+    # > The default integer data type should be the same across platforms, but
+    # > the default may vary depending on whether Python is 32-bit or 64-bit.
+    # > The default array index data type may be int32 on 32-bit platforms, but
+    # > the default should be int64 otherwise.
+    # https://data-apis.org/array-api/latest/API_specification/data_types.html#default-data-types
+    # TODO: once sufficiently adopted, we might want to instead rely on the
+    # newer inspection API: https://github.com/data-apis/array-api/issues/640
+    return xp.asarray(0).dtype
@@ -0,0 +1,93 @@
+from functools import update_wrapper, wraps
+from types import MethodType
+
+
+class _AvailableIfDescriptor:
+    """Implements a conditional property using the descriptor protocol.
+
+    Using this class to create a decorator will raise an ``AttributeError``
+    if check(self) returns a falsey value. Note that if check raises an error
+    this will also result in hasattr returning false.
+
+    See https://docs.python.org/3/howto/descriptor.html for an explanation of
+    descriptors.
+    """
+
+    def __init__(self, fn, check, attribute_name):
+        self.fn = fn
+        self.check = check
+        self.attribute_name = attribute_name
+
+        # update the docstring of the descriptor
+        update_wrapper(self, fn)
+
+    def _check(self, obj, owner):
+        attr_err_msg = (
+            f"This {repr(owner.__name__)} has no attribute {repr(self.attribute_name)}"
+        )
+        try:
+            check_result = self.check(obj)
+        except Exception as e:
+            raise AttributeError(attr_err_msg) from e
+
+        if not check_result:
+            raise AttributeError(attr_err_msg)
+
+    def __get__(self, obj, owner=None):
+        if obj is not None:
+            # delegate only on instances, not the classes.
+            # this is to allow access to the docstrings.
+            self._check(obj, owner=owner)
+            out = MethodType(self.fn, obj)
+
+        else:
+            # This makes it possible to use the decorated method as an unbound method,
+            # for instance when monkeypatching.
+            @wraps(self.fn)
+            def out(*args, **kwargs):
+                self._check(args[0], owner=owner)
+                return self.fn(*args, **kwargs)
+
+        return out
+
+
+def available_if(check):
+    """An attribute that is available only if check returns a truthy value.
+
+    Parameters
+    ----------
+    check : callable
+        When passed the object with the decorated method, this should return
+        a truthy value if the attribute is available, and either return False
+        or raise an AttributeError if not available.
+
+    Returns
+    -------
+    callable
+        Callable makes the decorated method available if `check` returns
+        a truthy value, otherwise the decorated method is unavailable.
+
+    Examples
+    --------
+    >>> from sklearn.utils.metaestimators import available_if
+    >>> class HelloIfEven:
+    ...    def __init__(self, x):
+    ...        self.x = x
+    ...
+    ...    def _x_is_even(self):
+    ...        return self.x % 2 == 0
+    ...
+    ...    @available_if(_x_is_even)
+    ...    def say_hello(self):
+    ...        print("Hello")
+    ...
+    >>> obj = HelloIfEven(1)
+    >>> hasattr(obj, "say_hello")
+    False
+    >>> obj.x = 2
+    >>> hasattr(obj, "say_hello")
+    True
+    >>> obj.say_hello()
+    Hello
+    """
+    return lambda fn: _AvailableIfDescriptor(fn, check, attribute_name=fn.__name__)
@@ -0,0 +1,67 @@
+import warnings
+
+
+class Bunch(dict):
+    """Container object exposing keys as attributes.
+
+    Bunch objects are sometimes used as an output for functions and methods.
+    They extend dictionaries by enabling values to be accessed by key,
+    `bunch["value_key"]`, or by an attribute, `bunch.value_key`.
+
+    Examples
+    --------
+    >>> from sklearn.utils import Bunch
+    >>> b = Bunch(a=1, b=2)
+    >>> b['b']
+    2
+    >>> b.b
+    2
+    >>> b.a = 3
+    >>> b['a']
+    3
+    >>> b.c = 6
+    >>> b['c']
+    6
+    """
+
+    def __init__(self, **kwargs):
+        super().__init__(kwargs)
+
+        # Map from deprecated key to warning message
+        self.__dict__["_deprecated_key_to_warnings"] = {}
+
+    def __getitem__(self, key):
+        if key in self.__dict__.get("_deprecated_key_to_warnings", {}):
+            warnings.warn(
+                self._deprecated_key_to_warnings[key],
+                FutureWarning,
+            )
+        return super().__getitem__(key)
+
+    def _set_deprecated(self, value, *, new_key, deprecated_key, warning_message):
+        """Set key in dictionary to be deprecated with its warning message."""
+        self.__dict__["_deprecated_key_to_warnings"][deprecated_key] = warning_message
+        self[new_key] = self[deprecated_key] = value
+
+    def __setattr__(self, key, value):
+        self[key] = value
+
+    def __dir__(self):
+        return self.keys()
+
+    def __getattr__(self, key):
+        try:
+            return self[key]
+        except KeyError:
+            raise AttributeError(key)
+
+    def __setstate__(self, state):
+        # Bunch pickles generated with scikit-learn 0.16.* have an non
+        # empty __dict__. This causes a surprising behaviour when
+        # loading these pickles scikit-learn 0.17: reading bunch.key
+        # uses __dict__ but assigning to bunch.key use __setattr__ and
+        # only changes bunch['key']. More details can be found at:
+        # https://github.com/scikit-learn/scikit-learn/issues/6196.
+        # Overriding __setstate__ to be a noop has the effect of
+        # ignoring the pickled __dict__
+        pass
@@ -0,0 +1,175 @@
+import warnings
+from itertools import islice
+from numbers import Integral
+
+import numpy as np
+
+from .._config import get_config
+from ._param_validation import Interval, validate_params
+
+
+def chunk_generator(gen, chunksize):
+    """Chunk generator, ``gen`` into lists of length ``chunksize``. The last
+    chunk may have a length less than ``chunksize``."""
+    while True:
+        chunk = list(islice(gen, chunksize))
+        if chunk:
+            yield chunk
+        else:
+            return
+
+
+@validate_params(
+    {
+        "n": [Interval(Integral, 1, None, closed="left")],
+        "batch_size": [Interval(Integral, 1, None, closed="left")],
+        "min_batch_size": [Interval(Integral, 0, None, closed="left")],
+    },
+    prefer_skip_nested_validation=True,
+)
+def gen_batches(n, batch_size, *, min_batch_size=0):
+    """Generator to create slices containing `batch_size` elements from 0 to `n`.
+
+    The last slice may contain less than `batch_size` elements, when
+    `batch_size` does not divide `n`.
+
+    Parameters
+    ----------
+    n : int
+        Size of the sequence.
+    batch_size : int
+        Number of elements in each batch.
+    min_batch_size : int, default=0
+        Minimum number of elements in each batch.
+
+    Yields
+    ------
+    slice of `batch_size` elements
+
+    See Also
+    --------
+    gen_even_slices: Generator to create n_packs slices going up to n.
+
+    Examples
+    --------
+    >>> from sklearn.utils import gen_batches
+    >>> list(gen_batches(7, 3))
+    [slice(0, 3, None), slice(3, 6, None), slice(6, 7, None)]
+    >>> list(gen_batches(6, 3))
+    [slice(0, 3, None), slice(3, 6, None)]
+    >>> list(gen_batches(2, 3))
+    [slice(0, 2, None)]
+    >>> list(gen_batches(7, 3, min_batch_size=0))
+    [slice(0, 3, None), slice(3, 6, None), slice(6, 7, None)]
+    >>> list(gen_batches(7, 3, min_batch_size=2))
+    [slice(0, 3, None), slice(3, 7, None)]
+    """
+    start = 0
+    for _ in range(int(n // batch_size)):
+        end = start + batch_size
+        if end + min_batch_size > n:
+            continue
+        yield slice(start, end)
+        start = end
+    if start < n:
+        yield slice(start, n)
+
+
+@validate_params(
+    {
+        "n": [Interval(Integral, 1, None, closed="left")],
+        "n_packs": [Interval(Integral, 1, None, closed="left")],
+        "n_samples": [Interval(Integral, 1, None, closed="left"), None],
+    },
+    prefer_skip_nested_validation=True,
+)
+def gen_even_slices(n, n_packs, *, n_samples=None):
+    """Generator to create `n_packs` evenly spaced slices going up to `n`.
+
+    If `n_packs` does not divide `n`, except for the first `n % n_packs`
+    slices, remaining slices may contain fewer elements.
+
+    Parameters
+    ----------
+    n : int
+        Size of the sequence.
+    n_packs : int
+        Number of slices to generate.
+    n_samples : int, default=None
+        Number of samples. Pass `n_samples` when the slices are to be used for
+        sparse matrix indexing; slicing off-the-end raises an exception, while
+        it works for NumPy arrays.
+
+    Yields
+    ------
+    `slice` representing a set of indices from 0 to n.
+
+    See Also
+    --------
+    gen_batches: Generator to create slices containing batch_size elements
+        from 0 to n.
+
+    Examples
+    --------
+    >>> from sklearn.utils import gen_even_slices
+    >>> list(gen_even_slices(10, 1))
+    [slice(0, 10, None)]
+    >>> list(gen_even_slices(10, 10))
+    [slice(0, 1, None), slice(1, 2, None), ..., slice(9, 10, None)]
+    >>> list(gen_even_slices(10, 5))
+    [slice(0, 2, None), slice(2, 4, None), ..., slice(8, 10, None)]
+    >>> list(gen_even_slices(10, 3))
+    [slice(0, 4, None), slice(4, 7, None), slice(7, 10, None)]
+    """
+    start = 0
+    for pack_num in range(n_packs):
+        this_n = n // n_packs
+        if pack_num < n % n_packs:
+            this_n += 1
+        if this_n > 0:
+            end = start + this_n
+            if n_samples is not None:
+                end = min(n_samples, end)
+            yield slice(start, end, None)
+            start = end
+
+
+def get_chunk_n_rows(row_bytes, *, max_n_rows=None, working_memory=None):
+    """Calculate how many rows can be processed within `working_memory`.
+
+    Parameters
+    ----------
+    row_bytes : int
+        The expected number of bytes of memory that will be consumed
+        during the processing of each row.
+    max_n_rows : int, default=None
+        The maximum return value.
+    working_memory : int or float, default=None
+        The number of rows to fit inside this number of MiB will be
+        returned. When None (default), the value of
+        ``sklearn.get_config()['working_memory']`` is used.
+
+    Returns
+    -------
+    int
+        The number of rows which can be processed within `working_memory`.
+
+    Warns
+    -----
+    Issues a UserWarning if `row_bytes exceeds `working_memory` MiB.
+    """
+
+    if working_memory is None:
+        working_memory = get_config()["working_memory"]
+
+    chunk_n_rows = int(working_memory * (2**20) // row_bytes)
+    if max_n_rows is not None:
+        chunk_n_rows = min(chunk_n_rows, max_n_rows)
+    if chunk_n_rows < 1:
+        warnings.warn(
+            "Could not adhere to working_memory config. "
+            "Currently %.0fMiB, %.0fMiB required."
+            % (working_memory, np.ceil(row_bytes * 2**-20))
+        )
+        chunk_n_rows = 1
+    return chunk_n_rows
@@ -0,0 +1,41 @@
+from cython cimport floating
+
+
+cpdef enum BLAS_Order:
+    RowMajor  # C contiguous
+    ColMajor  # Fortran contiguous
+
+
+cpdef enum BLAS_Trans:
+    NoTrans = 110  # correspond to 'n'
+    Trans = 116    # correspond to 't'
+
+
+# BLAS Level 1 ################################################################
+cdef floating _dot(int, const floating*, int, const floating*, int) noexcept nogil
+
+cdef floating _asum(int, const floating*, int) noexcept nogil
+
+cdef void _axpy(int, floating, const floating*, int, floating*, int) noexcept nogil
+
+cdef floating _nrm2(int, const floating*, int) noexcept nogil
+
+cdef void _copy(int, const floating*, int, const floating*, int) noexcept nogil
+
+cdef void _scal(int, floating, const floating*, int) noexcept nogil
+
+cdef void _rotg(floating*, floating*, floating*, floating*) noexcept nogil
+
+cdef void _rot(int, floating*, int, floating*, int, floating, floating) noexcept nogil
+
+# BLAS Level 2 ################################################################
+cdef void _gemv(BLAS_Order, BLAS_Trans, int, int, floating, const floating*, int,
+                const floating*, int, floating, floating*, int) noexcept nogil
+
+cdef void _ger(BLAS_Order, int, int, floating, const floating*, int, const floating*,
+               int, floating*, int) noexcept nogil
+
+# BLASLevel 3 ################################################################
+cdef void _gemm(BLAS_Order, BLAS_Trans, BLAS_Trans, int, int, int, floating,
+                const floating*, int, const floating*, int, floating, floating*,
+                int) noexcept nogil
@@ -0,0 +1,233 @@
+from cython cimport floating
+
+from scipy.linalg.cython_blas cimport sdot, ddot
+from scipy.linalg.cython_blas cimport sasum, dasum
+from scipy.linalg.cython_blas cimport saxpy, daxpy
+from scipy.linalg.cython_blas cimport snrm2, dnrm2
+from scipy.linalg.cython_blas cimport scopy, dcopy
+from scipy.linalg.cython_blas cimport sscal, dscal
+from scipy.linalg.cython_blas cimport srotg, drotg
+from scipy.linalg.cython_blas cimport srot, drot
+from scipy.linalg.cython_blas cimport sgemv, dgemv
+from scipy.linalg.cython_blas cimport sger, dger
+from scipy.linalg.cython_blas cimport sgemm, dgemm
+
+
+################
+# BLAS Level 1 #
+################
+
+cdef floating _dot(int n, const floating *x, int incx,
+                   const floating *y, int incy) noexcept nogil:
+    """x.T.y"""
+    if floating is float:
+        return sdot(&n, <float *> x, &incx, <float *> y, &incy)
+    else:
+        return ddot(&n, <double *> x, &incx, <double *> y, &incy)
+
+
+cpdef _dot_memview(const floating[::1] x, const floating[::1] y):
+    return _dot(x.shape[0], &x[0], 1, &y[0], 1)
+
+
+cdef floating _asum(int n, const floating *x, int incx) noexcept nogil:
+    """sum(|x_i|)"""
+    if floating is float:
+        return sasum(&n, <float *> x, &incx)
+    else:
+        return dasum(&n, <double *> x, &incx)
+
+
+cpdef _asum_memview(const floating[::1] x):
+    return _asum(x.shape[0], &x[0], 1)
+
+
+cdef void _axpy(int n, floating alpha, const floating *x, int incx,
+                floating *y, int incy) noexcept nogil:
+    """y := alpha * x + y"""
+    if floating is float:
+        saxpy(&n, &alpha, <float *> x, &incx, y, &incy)
+    else:
+        daxpy(&n, &alpha, <double *> x, &incx, y, &incy)
+
+
+cpdef _axpy_memview(floating alpha, const floating[::1] x, floating[::1] y):
+    _axpy(x.shape[0], alpha, &x[0], 1, &y[0], 1)
+
+
+cdef floating _nrm2(int n, const floating *x, int incx) noexcept nogil:
+    """sqrt(sum((x_i)^2))"""
+    if floating is float:
+        return snrm2(&n, <float *> x, &incx)
+    else:
+        return dnrm2(&n, <double *> x, &incx)
+
+
+cpdef _nrm2_memview(const floating[::1] x):
+    return _nrm2(x.shape[0], &x[0], 1)
+
+
+cdef void _copy(int n, const floating *x, int incx, const floating *y, int incy) noexcept nogil:
+    """y := x"""
+    if floating is float:
+        scopy(&n, <float *> x, &incx, <float *> y, &incy)
+    else:
+        dcopy(&n, <double *> x, &incx, <double *> y, &incy)
+
+
+cpdef _copy_memview(const floating[::1] x, const floating[::1] y):
+    _copy(x.shape[0], &x[0], 1, &y[0], 1)
+
+
+cdef void _scal(int n, floating alpha, const floating *x, int incx) noexcept nogil:
+    """x := alpha * x"""
+    if floating is float:
+        sscal(&n, &alpha, <float *> x, &incx)
+    else:
+        dscal(&n, &alpha, <double *> x, &incx)
+
+
+cpdef _scal_memview(floating alpha, const floating[::1] x):
+    _scal(x.shape[0], alpha, &x[0], 1)
+
+
+cdef void _rotg(floating *a, floating *b, floating *c, floating *s) noexcept nogil:
+    """Generate plane rotation"""
+    if floating is float:
+        srotg(a, b, c, s)
+    else:
+        drotg(a, b, c, s)
+
+
+cpdef _rotg_memview(floating a, floating b, floating c, floating s):
+    _rotg(&a, &b, &c, &s)
+    return a, b, c, s
+
+
+cdef void _rot(int n, floating *x, int incx, floating *y, int incy,
+               floating c, floating s) noexcept nogil:
+    """Apply plane rotation"""
+    if floating is float:
+        srot(&n, x, &incx, y, &incy, &c, &s)
+    else:
+        drot(&n, x, &incx, y, &incy, &c, &s)
+
+
+cpdef _rot_memview(floating[::1] x, floating[::1] y, floating c, floating s):
+    _rot(x.shape[0], &x[0], 1, &y[0], 1, c, s)
+
+
+################
+# BLAS Level 2 #
+################
+
+cdef void _gemv(BLAS_Order order, BLAS_Trans ta, int m, int n, floating alpha,
+                const floating *A, int lda, const floating *x, int incx,
+                floating beta, floating *y, int incy) noexcept nogil:
+    """y := alpha * op(A).x + beta * y"""
+    cdef char ta_ = ta
+    if order == RowMajor:
+        ta_ = NoTrans if ta == Trans else Trans
+        if floating is float:
+            sgemv(&ta_, &n, &m, &alpha, <float *> A, &lda, <float *> x,
+                  &incx, &beta, y, &incy)
+        else:
+            dgemv(&ta_, &n, &m, &alpha, <double *> A, &lda, <double *> x,
+                  &incx, &beta, y, &incy)
+    else:
+        if floating is float:
+            sgemv(&ta_, &m, &n, &alpha, <float *> A, &lda, <float *> x,
+                  &incx, &beta, y, &incy)
+        else:
+            dgemv(&ta_, &m, &n, &alpha, <double *> A, &lda, <double *> x,
+                  &incx, &beta, y, &incy)
+
+
+cpdef _gemv_memview(BLAS_Trans ta, floating alpha, const floating[:, :] A,
+                    const floating[::1] x, floating beta, floating[::1] y):
+    cdef:
+        int m = A.shape[0]
+        int n = A.shape[1]
+        BLAS_Order order = ColMajor if A.strides[0] == A.itemsize else RowMajor
+        int lda = m if order == ColMajor else n
+
+    _gemv(order, ta, m, n, alpha, &A[0, 0], lda, &x[0], 1, beta, &y[0], 1)
+
+
+cdef void _ger(BLAS_Order order, int m, int n, floating alpha,
+               const floating *x, int incx, const floating *y,
+               int incy, floating *A, int lda) noexcept nogil:
+    """A := alpha * x.y.T + A"""
+    if order == RowMajor:
+        if floating is float:
+            sger(&n, &m, &alpha, <float *> y, &incy, <float *> x, &incx, A, &lda)
+        else:
+            dger(&n, &m, &alpha, <double *> y, &incy, <double *> x, &incx, A, &lda)
+    else:
+        if floating is float:
+            sger(&m, &n, &alpha, <float *> x, &incx, <float *> y, &incy, A, &lda)
+        else:
+            dger(&m, &n, &alpha, <double *> x, &incx, <double *> y, &incy, A, &lda)
+
+
+cpdef _ger_memview(floating alpha, const floating[::1] x,
+                   const floating[::1] y, floating[:, :] A):
+    cdef:
+        int m = A.shape[0]
+        int n = A.shape[1]
+        BLAS_Order order = ColMajor if A.strides[0] == A.itemsize else RowMajor
+        int lda = m if order == ColMajor else n
+
+    _ger(order, m, n, alpha, &x[0], 1, &y[0], 1, &A[0, 0], lda)
+
+
+################
+# BLAS Level 3 #
+################
+
+cdef void _gemm(BLAS_Order order, BLAS_Trans ta, BLAS_Trans tb, int m, int n,
+                int k, floating alpha, const floating *A, int lda, const floating *B,
+                int ldb, floating beta, floating *C, int ldc) noexcept nogil:
+    """C := alpha * op(A).op(B) + beta * C"""
+    # TODO: Remove the pointer casts below once SciPy uses const-qualification.
+    # See: https://github.com/scipy/scipy/issues/14262
+    cdef:
+        char ta_ = ta
+        char tb_ = tb
+    if order == RowMajor:
+        if floating is float:
+            sgemm(&tb_, &ta_, &n, &m, &k, &alpha, <float*>B,
+                  &ldb, <float*>A, &lda, &beta, C, &ldc)
+        else:
+            dgemm(&tb_, &ta_, &n, &m, &k, &alpha, <double*>B,
+                  &ldb, <double*>A, &lda, &beta, C, &ldc)
+    else:
+        if floating is float:
+            sgemm(&ta_, &tb_, &m, &n, &k, &alpha, <float*>A,
+                  &lda, <float*>B, &ldb, &beta, C, &ldc)
+        else:
+            dgemm(&ta_, &tb_, &m, &n, &k, &alpha, <double*>A,
+                  &lda, <double*>B, &ldb, &beta, C, &ldc)
+
+
+cpdef _gemm_memview(BLAS_Trans ta, BLAS_Trans tb, floating alpha,
+                    const floating[:, :] A, const floating[:, :] B, floating beta,
+                    floating[:, :] C):
+    cdef:
+        int m = A.shape[0] if ta == NoTrans else A.shape[1]
+        int n = B.shape[1] if tb == NoTrans else B.shape[0]
+        int k = A.shape[1] if ta == NoTrans else A.shape[0]
+        int lda, ldb, ldc
+        BLAS_Order order = ColMajor if A.strides[0] == A.itemsize else RowMajor
+
+    if order == RowMajor:
+        lda = k if ta == NoTrans else m
+        ldb = n if tb == NoTrans else k
+        ldc = n
+    else:
+        lda = m if ta == NoTrans else k
+        ldb = k if tb == NoTrans else n
+        ldc = m
+
+    _gemm(order, ta, tb, m, n, k, alpha, &A[0, 0],
+          lda, &B[0, 0], ldb, beta, &C[0, 0], ldc)
@@ -0,0 +1,367 @@
+from collections import Counter
+from contextlib import suppress
+from typing import NamedTuple
+
+import numpy as np
+
+from ._missing import is_scalar_nan
+
+
+def _unique(values, *, return_inverse=False, return_counts=False):
+    """Helper function to find unique values with support for python objects.
+
+    Uses pure python method for object dtype, and numpy method for
+    all other dtypes.
+
+    Parameters
+    ----------
+    values : ndarray
+        Values to check for unknowns.
+
+    return_inverse : bool, default=False
+        If True, also return the indices of the unique values.
+
+    return_counts : bool, default=False
+        If True, also return the number of times each unique item appears in
+        values.
+
+    Returns
+    -------
+    unique : ndarray
+        The sorted unique values.
+
+    unique_inverse : ndarray
+        The indices to reconstruct the original array from the unique array.
+        Only provided if `return_inverse` is True.
+
+    unique_counts : ndarray
+        The number of times each of the unique values comes up in the original
+        array. Only provided if `return_counts` is True.
+    """
+    if values.dtype == object:
+        return _unique_python(
+            values, return_inverse=return_inverse, return_counts=return_counts
+        )
+    # numerical
+    return _unique_np(
+        values, return_inverse=return_inverse, return_counts=return_counts
+    )
+
+
+def _unique_np(values, return_inverse=False, return_counts=False):
+    """Helper function to find unique values for numpy arrays that correctly
+    accounts for nans. See `_unique` documentation for details."""
+    uniques = np.unique(
+        values, return_inverse=return_inverse, return_counts=return_counts
+    )
+
+    inverse, counts = None, None
+
+    if return_counts:
+        *uniques, counts = uniques
+
+    if return_inverse:
+        *uniques, inverse = uniques
+
+    if return_counts or return_inverse:
+        uniques = uniques[0]
+
+    # np.unique will have duplicate missing values at the end of `uniques`
+    # here we clip the nans and remove it from uniques
+    if uniques.size and is_scalar_nan(uniques[-1]):
+        nan_idx = np.searchsorted(uniques, np.nan)
+        uniques = uniques[: nan_idx + 1]
+        if return_inverse:
+            inverse[inverse > nan_idx] = nan_idx
+
+        if return_counts:
+            counts[nan_idx] = np.sum(counts[nan_idx:])
+            counts = counts[: nan_idx + 1]
+
+    ret = (uniques,)
+
+    if return_inverse:
+        ret += (inverse,)
+
+    if return_counts:
+        ret += (counts,)
+
+    return ret[0] if len(ret) == 1 else ret
+
+
+class MissingValues(NamedTuple):
+    """Data class for missing data information"""
+
+    nan: bool
+    none: bool
+
+    def to_list(self):
+        """Convert tuple to a list where None is always first."""
+        output = []
+        if self.none:
+            output.append(None)
+        if self.nan:
+            output.append(np.nan)
+        return output
+
+
+def _extract_missing(values):
+    """Extract missing values from `values`.
+
+    Parameters
+    ----------
+    values: set
+        Set of values to extract missing from.
+
+    Returns
+    -------
+    output: set
+        Set with missing values extracted.
+
+    missing_values: MissingValues
+        Object with missing value information.
+    """
+    missing_values_set = {
+        value for value in values if value is None or is_scalar_nan(value)
+    }
+
+    if not missing_values_set:
+        return values, MissingValues(nan=False, none=False)
+
+    if None in missing_values_set:
+        if len(missing_values_set) == 1:
+            output_missing_values = MissingValues(nan=False, none=True)
+        else:
+            # If there is more than one missing value, then it has to be
+            # float('nan') or np.nan
+            output_missing_values = MissingValues(nan=True, none=True)
+    else:
+        output_missing_values = MissingValues(nan=True, none=False)
+
+    # create set without the missing values
+    output = values - missing_values_set
+    return output, output_missing_values
+
+
+class _nandict(dict):
+    """Dictionary with support for nans."""
+
+    def __init__(self, mapping):
+        super().__init__(mapping)
+        for key, value in mapping.items():
+            if is_scalar_nan(key):
+                self.nan_value = value
+                break
+
+    def __missing__(self, key):
+        if hasattr(self, "nan_value") and is_scalar_nan(key):
+            return self.nan_value
+        raise KeyError(key)
+
+
+def _map_to_integer(values, uniques):
+    """Map values based on its position in uniques."""
+    table = _nandict({val: i for i, val in enumerate(uniques)})
+    return np.array([table[v] for v in values])
+
+
+def _unique_python(values, *, return_inverse, return_counts):
+    # Only used in `_uniques`, see docstring there for details
+    try:
+        uniques_set = set(values)
+        uniques_set, missing_values = _extract_missing(uniques_set)
+
+        uniques = sorted(uniques_set)
+        uniques.extend(missing_values.to_list())
+        uniques = np.array(uniques, dtype=values.dtype)
+    except TypeError:
+        types = sorted(t.__qualname__ for t in set(type(v) for v in values))
+        raise TypeError(
+            "Encoders require their input argument must be uniformly "
+            f"strings or numbers. Got {types}"
+        )
+    ret = (uniques,)
+
+    if return_inverse:
+        ret += (_map_to_integer(values, uniques),)
+
+    if return_counts:
+        ret += (_get_counts(values, uniques),)
+
+    return ret[0] if len(ret) == 1 else ret
+
+
+def _encode(values, *, uniques, check_unknown=True):
+    """Helper function to encode values into [0, n_uniques - 1].
+
+    Uses pure python method for object dtype, and numpy method for
+    all other dtypes.
+    The numpy method has the limitation that the `uniques` need to
+    be sorted. Importantly, this is not checked but assumed to already be
+    the case. The calling method needs to ensure this for all non-object
+    values.
+
+    Parameters
+    ----------
+    values : ndarray
+        Values to encode.
+    uniques : ndarray
+        The unique values in `values`. If the dtype is not object, then
+        `uniques` needs to be sorted.
+    check_unknown : bool, default=True
+        If True, check for values in `values` that are not in `unique`
+        and raise an error. This is ignored for object dtype, and treated as
+        True in this case. This parameter is useful for
+        _BaseEncoder._transform() to avoid calling _check_unknown()
+        twice.
+
+    Returns
+    -------
+    encoded : ndarray
+        Encoded values
+    """
+    if values.dtype.kind in "OUS":
+        try:
+            return _map_to_integer(values, uniques)
+        except KeyError as e:
+            raise ValueError(f"y contains previously unseen labels: {str(e)}")
+    else:
+        if check_unknown:
+            diff = _check_unknown(values, uniques)
+            if diff:
+                raise ValueError(f"y contains previously unseen labels: {str(diff)}")
+        return np.searchsorted(uniques, values)
+
+
+def _check_unknown(values, known_values, return_mask=False):
+    """
+    Helper function to check for unknowns in values to be encoded.
+
+    Uses pure python method for object dtype, and numpy method for
+    all other dtypes.
+
+    Parameters
+    ----------
+    values : array
+        Values to check for unknowns.
+    known_values : array
+        Known values. Must be unique.
+    return_mask : bool, default=False
+        If True, return a mask of the same shape as `values` indicating
+        the valid values.
+
+    Returns
+    -------
+    diff : list
+        The unique values present in `values` and not in `know_values`.
+    valid_mask : boolean array
+        Additionally returned if ``return_mask=True``.
+
+    """
+    valid_mask = None
+
+    if values.dtype.kind in "OUS":
+        values_set = set(values)
+        values_set, missing_in_values = _extract_missing(values_set)
+
+        uniques_set = set(known_values)
+        uniques_set, missing_in_uniques = _extract_missing(uniques_set)
+        diff = values_set - uniques_set
+
+        nan_in_diff = missing_in_values.nan and not missing_in_uniques.nan
+        none_in_diff = missing_in_values.none and not missing_in_uniques.none
+
+        def is_valid(value):
+            return (
+                value in uniques_set
+                or missing_in_uniques.none
+                and value is None
+                or missing_in_uniques.nan
+                and is_scalar_nan(value)
+            )
+
+        if return_mask:
+            if diff or nan_in_diff or none_in_diff:
+                valid_mask = np.array([is_valid(value) for value in values])
+            else:
+                valid_mask = np.ones(len(values), dtype=bool)
+
+        diff = list(diff)
+        if none_in_diff:
+            diff.append(None)
+        if nan_in_diff:
+            diff.append(np.nan)
+    else:
+        unique_values = np.unique(values)
+        diff = np.setdiff1d(unique_values, known_values, assume_unique=True)
+        if return_mask:
+            if diff.size:
+                valid_mask = np.isin(values, known_values)
+            else:
+                valid_mask = np.ones(len(values), dtype=bool)
+
+        # check for nans in the known_values
+        if np.isnan(known_values).any():
+            diff_is_nan = np.isnan(diff)
+            if diff_is_nan.any():
+                # removes nan from valid_mask
+                if diff.size and return_mask:
+                    is_nan = np.isnan(values)
+                    valid_mask[is_nan] = 1
+
+                # remove nan from diff
+                diff = diff[~diff_is_nan]
+        diff = list(diff)
+
+    if return_mask:
+        return diff, valid_mask
+    return diff
+
+
+class _NaNCounter(Counter):
+    """Counter with support for nan values."""
+
+    def __init__(self, items):
+        super().__init__(self._generate_items(items))
+
+    def _generate_items(self, items):
+        """Generate items without nans. Stores the nan counts separately."""
+        for item in items:
+            if not is_scalar_nan(item):
+                yield item
+                continue
+            if not hasattr(self, "nan_count"):
+                self.nan_count = 0
+            self.nan_count += 1
+
+    def __missing__(self, key):
+        if hasattr(self, "nan_count") and is_scalar_nan(key):
+            return self.nan_count
+        raise KeyError(key)
+
+
+def _get_counts(values, uniques):
+    """Get the count of each of the `uniques` in `values`.
+
+    The counts will use the order passed in by `uniques`. For non-object dtypes,
+    `uniques` is assumed to be sorted and `np.nan` is at the end.
+    """
+    if values.dtype.kind in "OU":
+        counter = _NaNCounter(values)
+        output = np.zeros(len(uniques), dtype=np.int64)
+        for i, item in enumerate(uniques):
+            with suppress(KeyError):
+                output[i] = counter[item]
+        return output
+
+    unique_values, counts = _unique_np(values, return_counts=True)
+
+    # Recorder unique_values based on input: `uniques`
+    uniques_in_values = np.isin(uniques, unique_values, assume_unique=True)
+    if np.isnan(unique_values[-1]) and np.isnan(uniques[-1]):
+        uniques_in_values[-1] = True
+
+    unique_valid_indices = np.searchsorted(unique_values, uniques[uniques_in_values])
+    output = np.zeros_like(uniques, dtype=np.int64)
+    output[uniques_in_values] = counts[unique_valid_indices]
+    return output
@@ -0,0 +1,404 @@
+#$id {
+  /* Definition of color scheme common for light and dark mode */
+  --sklearn-color-text: black;
+  --sklearn-color-line: gray;
+  /* Definition of color scheme for unfitted estimators */
+  --sklearn-color-unfitted-level-0: #fff5e6;
+  --sklearn-color-unfitted-level-1: #f6e4d2;
+  --sklearn-color-unfitted-level-2: #ffe0b3;
+  --sklearn-color-unfitted-level-3: chocolate;
+  /* Definition of color scheme for fitted estimators */
+  --sklearn-color-fitted-level-0: #f0f8ff;
+  --sklearn-color-fitted-level-1: #d4ebff;
+  --sklearn-color-fitted-level-2: #b3dbfd;
+  --sklearn-color-fitted-level-3: cornflowerblue;
+
+  /* Specific color for light theme */
+  --sklearn-color-text-on-default-background: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, black)));
+  --sklearn-color-background: var(--sg-background-color, var(--theme-background, var(--jp-layout-color0, white)));
+  --sklearn-color-border-box: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, black)));
+  --sklearn-color-icon: #696969;
+
+  @media (prefers-color-scheme: dark) {
+    /* Redefinition of color scheme for dark theme */
+    --sklearn-color-text-on-default-background: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, white)));
+    --sklearn-color-background: var(--sg-background-color, var(--theme-background, var(--jp-layout-color0, #111)));
+    --sklearn-color-border-box: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, white)));
+    --sklearn-color-icon: #878787;
+  }
+}
+
+#$id {
+  color: var(--sklearn-color-text);
+}
+
+#$id pre {
+  padding: 0;
+}
+
+#$id input.sk-hidden--visually {
+  border: 0;
+  clip: rect(1px 1px 1px 1px);
+  clip: rect(1px, 1px, 1px, 1px);
+  height: 1px;
+  margin: -1px;
+  overflow: hidden;
+  padding: 0;
+  position: absolute;
+  width: 1px;
+}
+
+#$id div.sk-dashed-wrapped {
+  border: 1px dashed var(--sklearn-color-line);
+  margin: 0 0.4em 0.5em 0.4em;
+  box-sizing: border-box;
+  padding-bottom: 0.4em;
+  background-color: var(--sklearn-color-background);
+}
+
+#$id div.sk-container {
+  /* jupyter's `normalize.less` sets `[hidden] { display: none; }`
+     but bootstrap.min.css set `[hidden] { display: none !important; }`
+     so we also need the `!important` here to be able to override the
+     default hidden behavior on the sphinx rendered scikit-learn.org.
+     See: https://github.com/scikit-learn/scikit-learn/issues/21755 */
+  display: inline-block !important;
+  position: relative;
+}
+
+#$id div.sk-text-repr-fallback {
+  display: none;
+}
+
+div.sk-parallel-item,
+div.sk-serial,
+div.sk-item {
+  /* draw centered vertical line to link estimators */
+  background-image: linear-gradient(var(--sklearn-color-text-on-default-background), var(--sklearn-color-text-on-default-background));
+  background-size: 2px 100%;
+  background-repeat: no-repeat;
+  background-position: center center;
+}
+
+/* Parallel-specific style estimator block */
+
+#$id div.sk-parallel-item::after {
+  content: "";
+  width: 100%;
+  border-bottom: 2px solid var(--sklearn-color-text-on-default-background);
+  flex-grow: 1;
+}
+
+#$id div.sk-parallel {
+  display: flex;
+  align-items: stretch;
+  justify-content: center;
+  background-color: var(--sklearn-color-background);
+  position: relative;
+}
+
+#$id div.sk-parallel-item {
+  display: flex;
+  flex-direction: column;
+}
+
+#$id div.sk-parallel-item:first-child::after {
+  align-self: flex-end;
+  width: 50%;
+}
+
+#$id div.sk-parallel-item:last-child::after {
+  align-self: flex-start;
+  width: 50%;
+}
+
+#$id div.sk-parallel-item:only-child::after {
+  width: 0;
+}
+
+/* Serial-specific style estimator block */
+
+#$id div.sk-serial {
+  display: flex;
+  flex-direction: column;
+  align-items: center;
+  background-color: var(--sklearn-color-background);
+  padding-right: 1em;
+  padding-left: 1em;
+}
+
+
+/* Toggleable style: style used for estimator/Pipeline/ColumnTransformer box that is
+clickable and can be expanded/collapsed.
+- Pipeline and ColumnTransformer use this feature and define the default style
+- Estimators will overwrite some part of the style using the `sk-estimator` class
+*/
+
+/* Pipeline and ColumnTransformer style (default) */
+
+#$id div.sk-toggleable {
+  /* Default theme specific background. It is overwritten whether we have a
+  specific estimator or a Pipeline/ColumnTransformer */
+  background-color: var(--sklearn-color-background);
+}
+
+/* Toggleable label */
+#$id label.sk-toggleable__label {
+  cursor: pointer;
+  display: block;
+  width: 100%;
+  margin-bottom: 0;
+  padding: 0.5em;
+  box-sizing: border-box;
+  text-align: center;
+}
+
+#$id label.sk-toggleable__label-arrow:before {
+  /* Arrow on the left of the label */
+  content: "▸";
+  float: left;
+  margin-right: 0.25em;
+  color: var(--sklearn-color-icon);
+}
+
+#$id label.sk-toggleable__label-arrow:hover:before {
+  color: var(--sklearn-color-text);
+}
+
+/* Toggleable content - dropdown */
+
+#$id div.sk-toggleable__content {
+  max-height: 0;
+  max-width: 0;
+  overflow: hidden;
+  text-align: left;
+  /* unfitted */
+  background-color: var(--sklearn-color-unfitted-level-0);
+}
+
+#$id div.sk-toggleable__content.fitted {
+  /* fitted */
+  background-color: var(--sklearn-color-fitted-level-0);
+}
+
+#$id div.sk-toggleable__content pre {
+  margin: 0.2em;
+  border-radius: 0.25em;
+  color: var(--sklearn-color-text);
+  /* unfitted */
+  background-color: var(--sklearn-color-unfitted-level-0);
+}
+
+#$id div.sk-toggleable__content.fitted pre {
+  /* unfitted */
+  background-color: var(--sklearn-color-fitted-level-0);
+}
+
+#$id input.sk-toggleable__control:checked~div.sk-toggleable__content {
+  /* Expand drop-down */
+  max-height: 200px;
+  max-width: 100%;
+  overflow: auto;
+}
+
+#$id input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {
+  content: "▾";
+}
+
+/* Pipeline/ColumnTransformer-specific style */
+
+#$id div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {
+  color: var(--sklearn-color-text);
+  background-color: var(--sklearn-color-unfitted-level-2);
+}
+
+#$id div.sk-label.fitted input.sk-toggleable__control:checked~label.sk-toggleable__label {
+  background-color: var(--sklearn-color-fitted-level-2);
+}
+
+/* Estimator-specific style */
+
+/* Colorize estimator box */
+#$id div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {
+  /* unfitted */
+  background-color: var(--sklearn-color-unfitted-level-2);
+}
+
+#$id div.sk-estimator.fitted input.sk-toggleable__control:checked~label.sk-toggleable__label {
+  /* fitted */
+  background-color: var(--sklearn-color-fitted-level-2);
+}
+
+#$id div.sk-label label.sk-toggleable__label,
+#$id div.sk-label label {
+  /* The background is the default theme color */
+  color: var(--sklearn-color-text-on-default-background);
+}
+
+/* On hover, darken the color of the background */
+#$id div.sk-label:hover label.sk-toggleable__label {
+  color: var(--sklearn-color-text);
+  background-color: var(--sklearn-color-unfitted-level-2);
+}
+
+/* Label box, darken color on hover, fitted */
+#$id div.sk-label.fitted:hover label.sk-toggleable__label.fitted {
+  color: var(--sklearn-color-text);
+  background-color: var(--sklearn-color-fitted-level-2);
+}
+
+/* Estimator label */
+
+#$id div.sk-label label {
+  font-family: monospace;
+  font-weight: bold;
+  display: inline-block;
+  line-height: 1.2em;
+}
+
+#$id div.sk-label-container {
+  text-align: center;
+}
+
+/* Estimator-specific */
+#$id div.sk-estimator {
+  font-family: monospace;
+  border: 1px dotted var(--sklearn-color-border-box);
+  border-radius: 0.25em;
+  box-sizing: border-box;
+  margin-bottom: 0.5em;
+  /* unfitted */
+  background-color: var(--sklearn-color-unfitted-level-0);
+}
+
+#$id div.sk-estimator.fitted {
+  /* fitted */
+  background-color: var(--sklearn-color-fitted-level-0);
+}
+
+/* on hover */
+#$id div.sk-estimator:hover {
+  /* unfitted */
+  background-color: var(--sklearn-color-unfitted-level-2);
+}
+
+#$id div.sk-estimator.fitted:hover {
+  /* fitted */
+  background-color: var(--sklearn-color-fitted-level-2);
+}
+
+/* Specification for estimator info (e.g. "i" and "?") */
+
+/* Common style for "i" and "?" */
+
+.sk-estimator-doc-link,
+a:link.sk-estimator-doc-link,
+a:visited.sk-estimator-doc-link {
+  float: right;
+  font-size: smaller;
+  line-height: 1em;
+  font-family: monospace;
+  background-color: var(--sklearn-color-background);
+  border-radius: 1em;
+  height: 1em;
+  width: 1em;
+  text-decoration: none !important;
+  margin-left: 1ex;
+  /* unfitted */
+  border: var(--sklearn-color-unfitted-level-1) 1pt solid;
+  color: var(--sklearn-color-unfitted-level-1);
+}
+
+.sk-estimator-doc-link.fitted,
+a:link.sk-estimator-doc-link.fitted,
+a:visited.sk-estimator-doc-link.fitted {
+  /* fitted */
+  border: var(--sklearn-color-fitted-level-1) 1pt solid;
+  color: var(--sklearn-color-fitted-level-1);
+}
+
+/* On hover */
+div.sk-estimator:hover .sk-estimator-doc-link:hover,
+.sk-estimator-doc-link:hover,
+div.sk-label-container:hover .sk-estimator-doc-link:hover,
+.sk-estimator-doc-link:hover {
+  /* unfitted */
+  background-color: var(--sklearn-color-unfitted-level-3);
+  color: var(--sklearn-color-background);
+  text-decoration: none;
+}
+
+div.sk-estimator.fitted:hover .sk-estimator-doc-link.fitted:hover,
+.sk-estimator-doc-link.fitted:hover,
+div.sk-label-container:hover .sk-estimator-doc-link.fitted:hover,
+.sk-estimator-doc-link.fitted:hover {
+  /* fitted */
+  background-color: var(--sklearn-color-fitted-level-3);
+  color: var(--sklearn-color-background);
+  text-decoration: none;
+}
+
+/* Span, style for the box shown on hovering the info icon */
+.sk-estimator-doc-link span {
+  display: none;
+  z-index: 9999;
+  position: relative;
+  font-weight: normal;
+  right: .2ex;
+  padding: .5ex;
+  margin: .5ex;
+  width: min-content;
+  min-width: 20ex;
+  max-width: 50ex;
+  color: var(--sklearn-color-text);
+  box-shadow: 2pt 2pt 4pt #999;
+  /* unfitted */
+  background: var(--sklearn-color-unfitted-level-0);
+  border: .5pt solid var(--sklearn-color-unfitted-level-3);
+}
+
+.sk-estimator-doc-link.fitted span {
+  /* fitted */
+  background: var(--sklearn-color-fitted-level-0);
+  border: var(--sklearn-color-fitted-level-3);
+}
+
+.sk-estimator-doc-link:hover span {
+  display: block;
+}
+
+/* "?"-specific style due to the `<a>` HTML tag */
+
+#$id a.estimator_doc_link {
+  float: right;
+  font-size: 1rem;
+  line-height: 1em;
+  font-family: monospace;
+  background-color: var(--sklearn-color-background);
+  border-radius: 1rem;
+  height: 1rem;
+  width: 1rem;
+  text-decoration: none;
+  /* unfitted */
+  color: var(--sklearn-color-unfitted-level-1);
+  border: var(--sklearn-color-unfitted-level-1) 1pt solid;
+}
+
+#$id a.estimator_doc_link.fitted {
+  /* fitted */
+  border: var(--sklearn-color-fitted-level-1) 1pt solid;
+  color: var(--sklearn-color-fitted-level-1);
+}
+
+/* On hover */
+#$id a.estimator_doc_link:hover {
+  /* unfitted */
+  background-color: var(--sklearn-color-unfitted-level-3);
+  color: var(--sklearn-color-background);
+  text-decoration: none;
+}
+
+#$id a.estimator_doc_link.fitted:hover {
+  /* fitted */
+  background-color: var(--sklearn-color-fitted-level-3);
+}
@@ -0,0 +1,496 @@
+import html
+import itertools
+from contextlib import closing
+from inspect import isclass
+from io import StringIO
+from pathlib import Path
+from string import Template
+
+from .. import __version__, config_context
+from .fixes import parse_version
+
+
+class _IDCounter:
+    """Generate sequential ids with a prefix."""
+
+    def __init__(self, prefix):
+        self.prefix = prefix
+        self.count = 0
+
+    def get_id(self):
+        self.count += 1
+        return f"{self.prefix}-{self.count}"
+
+
+def _get_css_style():
+    return Path(__file__).with_suffix(".css").read_text(encoding="utf-8")
+
+
+_CONTAINER_ID_COUNTER = _IDCounter("sk-container-id")
+_ESTIMATOR_ID_COUNTER = _IDCounter("sk-estimator-id")
+_CSS_STYLE = _get_css_style()
+
+
+class _VisualBlock:
+    """HTML Representation of Estimator
+
+    Parameters
+    ----------
+    kind : {'serial', 'parallel', 'single'}
+        kind of HTML block
+
+    estimators : list of estimators or `_VisualBlock`s or a single estimator
+        If kind != 'single', then `estimators` is a list of
+        estimators.
+        If kind == 'single', then `estimators` is a single estimator.
+
+    names : list of str, default=None
+        If kind != 'single', then `names` corresponds to estimators.
+        If kind == 'single', then `names` is a single string corresponding to
+        the single estimator.
+
+    name_details : list of str, str, or None, default=None
+        If kind != 'single', then `name_details` corresponds to `names`.
+        If kind == 'single', then `name_details` is a single string
+        corresponding to the single estimator.
+
+    dash_wrapped : bool, default=True
+        If true, wrapped HTML element will be wrapped with a dashed border.
+        Only active when kind != 'single'.
+    """
+
+    def __init__(
+        self, kind, estimators, *, names=None, name_details=None, dash_wrapped=True
+    ):
+        self.kind = kind
+        self.estimators = estimators
+        self.dash_wrapped = dash_wrapped
+
+        if self.kind in ("parallel", "serial"):
+            if names is None:
+                names = (None,) * len(estimators)
+            if name_details is None:
+                name_details = (None,) * len(estimators)
+
+        self.names = names
+        self.name_details = name_details
+
+    def _sk_visual_block_(self):
+        return self
+
+
+def _write_label_html(
+    out,
+    name,
+    name_details,
+    outer_class="sk-label-container",
+    inner_class="sk-label",
+    checked=False,
+    doc_link="",
+    is_fitted_css_class="",
+    is_fitted_icon="",
+):
+    """Write labeled html with or without a dropdown with named details.
+
+    Parameters
+    ----------
+    out : file-like object
+        The file to write the HTML representation to.
+    name : str
+        The label for the estimator. It corresponds either to the estimator class name
+        for a simple estimator or in the case of a `Pipeline` and `ColumnTransformer`,
+        it corresponds to the name of the step.
+    name_details : str
+        The details to show as content in the dropdown part of the toggleable label. It
+        can contain information such as non-default parameters or column information for
+        `ColumnTransformer`.
+    outer_class : {"sk-label-container", "sk-item"}, default="sk-label-container"
+        The CSS class for the outer container.
+    inner_class : {"sk-label", "sk-estimator"}, default="sk-label"
+        The CSS class for the inner container.
+    checked : bool, default=False
+        Whether the dropdown is folded or not. With a single estimator, we intend to
+        unfold the content.
+    doc_link : str, default=""
+        The link to the documentation for the estimator. If an empty string, no link is
+        added to the diagram. This can be generated for an estimator if it uses the
+        `_HTMLDocumentationLinkMixin`.
+    is_fitted_css_class : {"", "fitted"}
+        The CSS class to indicate whether or not the estimator is fitted. The
+        empty string means that the estimator is not fitted and "fitted" means that the
+        estimator is fitted.
+    is_fitted_icon : str, default=""
+        The HTML representation to show the fitted information in the diagram. An empty
+        string means that no information is shown.
+    """
+    # we need to add some padding to the left of the label to be sure it is centered
+    padding_label = "&nbsp;" if is_fitted_icon else ""  # add padding for the "i" char
+
+    out.write(
+        f'<div class="{outer_class}"><div'
+        f' class="{inner_class} {is_fitted_css_class} sk-toggleable">'
+    )
+    name = html.escape(name)
+
+    if name_details is not None:
+        name_details = html.escape(str(name_details))
+        label_class = (
+            f"sk-toggleable__label {is_fitted_css_class} sk-toggleable__label-arrow"
+        )
+
+        checked_str = "checked" if checked else ""
+        est_id = _ESTIMATOR_ID_COUNTER.get_id()
+
+        if doc_link:
+            doc_label = "<span>Online documentation</span>"
+            if name is not None:
+                doc_label = f"<span>Documentation for {name}</span>"
+            doc_link = (
+                f'<a class="sk-estimator-doc-link {is_fitted_css_class}"'
+                f' rel="noreferrer" target="_blank" href="{doc_link}">?{doc_label}</a>'
+            )
+            padding_label += "&nbsp;"  # add additional padding for the "?" char
+
+        fmt_str = (
+            '<input class="sk-toggleable__control sk-hidden--visually"'
+            f' id="{est_id}" '
+            f'type="checkbox" {checked_str}><label for="{est_id}" '
+            f'class="{label_class} {is_fitted_css_class}">{padding_label}{name}'
+            f"{doc_link}{is_fitted_icon}</label><div "
+            f'class="sk-toggleable__content {is_fitted_css_class}">'
+            f"<pre>{name_details}</pre></div> "
+        )
+        out.write(fmt_str)
+    else:
+        out.write(f"<label>{name}</label>")
+    out.write("</div></div>")  # outer_class inner_class
+
+
+def _get_visual_block(estimator):
+    """Generate information about how to display an estimator."""
+    if hasattr(estimator, "_sk_visual_block_"):
+        try:
+            return estimator._sk_visual_block_()
+        except Exception:
+            return _VisualBlock(
+                "single",
+                estimator,
+                names=estimator.__class__.__name__,
+                name_details=str(estimator),
+            )
+
+    if isinstance(estimator, str):
+        return _VisualBlock(
+            "single", estimator, names=estimator, name_details=estimator
+        )
+    elif estimator is None:
+        return _VisualBlock("single", estimator, names="None", name_details="None")
+
+    # check if estimator looks like a meta estimator (wraps estimators)
+    if hasattr(estimator, "get_params") and not isclass(estimator):
+        estimators = [
+            (key, est)
+            for key, est in estimator.get_params(deep=False).items()
+            if hasattr(est, "get_params") and hasattr(est, "fit") and not isclass(est)
+        ]
+        if estimators:
+            return _VisualBlock(
+                "parallel",
+                [est for _, est in estimators],
+                names=[f"{key}: {est.__class__.__name__}" for key, est in estimators],
+                name_details=[str(est) for _, est in estimators],
+            )
+
+    return _VisualBlock(
+        "single",
+        estimator,
+        names=estimator.__class__.__name__,
+        name_details=str(estimator),
+    )
+
+
+def _write_estimator_html(
+    out,
+    estimator,
+    estimator_label,
+    estimator_label_details,
+    is_fitted_css_class,
+    is_fitted_icon="",
+    first_call=False,
+):
+    """Write estimator to html in serial, parallel, or by itself (single).
+
+    For multiple estimators, this function is called recursively.
+
+    Parameters
+    ----------
+    out : file-like object
+        The file to write the HTML representation to.
+    estimator : estimator object
+        The estimator to visualize.
+    estimator_label : str
+        The label for the estimator. It corresponds either to the estimator class name
+        for simple estimator or in the case of `Pipeline` and `ColumnTransformer`, it
+        corresponds to the name of the step.
+    estimator_label_details : str
+        The details to show as content in the dropdown part of the toggleable label.
+        It can contain information as non-default parameters or column information for
+        `ColumnTransformer`.
+    is_fitted_css_class : {"", "fitted"}
+        The CSS class to indicate whether or not the estimator is fitted or not. The
+        empty string means that the estimator is not fitted and "fitted" means that the
+        estimator is fitted.
+    is_fitted_icon : str, default=""
+        The HTML representation to show the fitted information in the diagram. An empty
+        string means that no information is shown. If the estimator to be shown is not
+        the first estimator (i.e. `first_call=False`), `is_fitted_icon` is always an
+        empty string.
+    first_call : bool, default=False
+        Whether this is the first time this function is called.
+    """
+    if first_call:
+        est_block = _get_visual_block(estimator)
+    else:
+        is_fitted_icon = ""
+        with config_context(print_changed_only=True):
+            est_block = _get_visual_block(estimator)
+    # `estimator` can also be an instance of `_VisualBlock`
+    if hasattr(estimator, "_get_doc_link"):
+        doc_link = estimator._get_doc_link()
+    else:
+        doc_link = ""
+    if est_block.kind in ("serial", "parallel"):
+        dashed_wrapped = first_call or est_block.dash_wrapped
+        dash_cls = " sk-dashed-wrapped" if dashed_wrapped else ""
+        out.write(f'<div class="sk-item{dash_cls}">')
+
+        if estimator_label:
+            _write_label_html(
+                out,
+                estimator_label,
+                estimator_label_details,
+                doc_link=doc_link,
+                is_fitted_css_class=is_fitted_css_class,
+                is_fitted_icon=is_fitted_icon,
+            )
+
+        kind = est_block.kind
+        out.write(f'<div class="sk-{kind}">')
+        est_infos = zip(est_block.estimators, est_block.names, est_block.name_details)
+
+        for est, name, name_details in est_infos:
+            if kind == "serial":
+                _write_estimator_html(
+                    out,
+                    est,
+                    name,
+                    name_details,
+                    is_fitted_css_class=is_fitted_css_class,
+                )
+            else:  # parallel
+                out.write('<div class="sk-parallel-item">')
+                # wrap element in a serial visualblock
+                serial_block = _VisualBlock("serial", [est], dash_wrapped=False)
+                _write_estimator_html(
+                    out,
+                    serial_block,
+                    name,
+                    name_details,
+                    is_fitted_css_class=is_fitted_css_class,
+                )
+                out.write("</div>")  # sk-parallel-item
+
+        out.write("</div></div>")
+    elif est_block.kind == "single":
+        _write_label_html(
+            out,
+            est_block.names,
+            est_block.name_details,
+            outer_class="sk-item",
+            inner_class="sk-estimator",
+            checked=first_call,
+            doc_link=doc_link,
+            is_fitted_css_class=is_fitted_css_class,
+            is_fitted_icon=is_fitted_icon,
+        )
+
+
+def estimator_html_repr(estimator):
+    """Build a HTML representation of an estimator.
+
+    Read more in the :ref:`User Guide <visualizing_composite_estimators>`.
+
+    Parameters
+    ----------
+    estimator : estimator object
+        The estimator to visualize.
+
+    Returns
+    -------
+    html: str
+        HTML representation of estimator.
+
+    Examples
+    --------
+    >>> from sklearn.utils._estimator_html_repr import estimator_html_repr
+    >>> from sklearn.linear_model import LogisticRegression
+    >>> estimator_html_repr(LogisticRegression())
+    '<style>...</div>'
+    """
+    from sklearn.exceptions import NotFittedError
+    from sklearn.utils.validation import check_is_fitted
+
+    if not hasattr(estimator, "fit"):
+        status_label = "<span>Not fitted</span>"
+        is_fitted_css_class = ""
+    else:
+        try:
+            check_is_fitted(estimator)
+            status_label = "<span>Fitted</span>"
+            is_fitted_css_class = "fitted"
+        except NotFittedError:
+            status_label = "<span>Not fitted</span>"
+            is_fitted_css_class = ""
+
+    is_fitted_icon = (
+        f'<span class="sk-estimator-doc-link {is_fitted_css_class}">'
+        f"i{status_label}</span>"
+    )
+    with closing(StringIO()) as out:
+        container_id = _CONTAINER_ID_COUNTER.get_id()
+        style_template = Template(_CSS_STYLE)
+        style_with_id = style_template.substitute(id=container_id)
+        estimator_str = str(estimator)
+
+        # The fallback message is shown by default and loading the CSS sets
+        # div.sk-text-repr-fallback to display: none to hide the fallback message.
+        #
+        # If the notebook is trusted, the CSS is loaded which hides the fallback
+        # message. If the notebook is not trusted, then the CSS is not loaded and the
+        # fallback message is shown by default.
+        #
+        # The reverse logic applies to HTML repr div.sk-container.
+        # div.sk-container is hidden by default and the loading the CSS displays it.
+        fallback_msg = (
+            "In a Jupyter environment, please rerun this cell to show the HTML"
+            " representation or trust the notebook. <br />On GitHub, the"
+            " HTML representation is unable to render, please try loading this page"
+            " with nbviewer.org."
+        )
+        html_template = (
+            f"<style>{style_with_id}</style>"
+            f'<div id="{container_id}" class="sk-top-container">'
+            '<div class="sk-text-repr-fallback">'
+            f"<pre>{html.escape(estimator_str)}</pre><b>{fallback_msg}</b>"
+            "</div>"
+            '<div class="sk-container" hidden>'
+        )
+
+        out.write(html_template)
+
+        _write_estimator_html(
+            out,
+            estimator,
+            estimator.__class__.__name__,
+            estimator_str,
+            first_call=True,
+            is_fitted_css_class=is_fitted_css_class,
+            is_fitted_icon=is_fitted_icon,
+        )
+        out.write("</div></div>")
+
+        html_output = out.getvalue()
+        return html_output
+
+
+class _HTMLDocumentationLinkMixin:
+    """Mixin class allowing to generate a link to the API documentation.
+
+    This mixin relies on three attributes:
+    - `_doc_link_module`: it corresponds to the root module (e.g. `sklearn`). Using this
+      mixin, the default value is `sklearn`.
+    - `_doc_link_template`: it corresponds to the template used to generate the
+      link to the API documentation. Using this mixin, the default value is
+      `"https://scikit-learn.org/{version_url}/modules/generated/
+      {estimator_module}.{estimator_name}.html"`.
+    - `_doc_link_url_param_generator`: it corresponds to a function that generates the
+      parameters to be used in the template when the estimator module and name are not
+      sufficient.
+
+    The method :meth:`_get_doc_link` generates the link to the API documentation for a
+    given estimator.
+
+    This useful provides all the necessary states for
+    :func:`sklearn.utils.estimator_html_repr` to generate a link to the API
+    documentation for the estimator HTML diagram.
+
+    Examples
+    --------
+    If the default values for `_doc_link_module`, `_doc_link_template` are not suitable,
+    then you can override them:
+    >>> from sklearn.base import BaseEstimator
+    >>> estimator = BaseEstimator()
+    >>> estimator._doc_link_template = "https://website.com/{single_param}.html"
+    >>> def url_param_generator(estimator):
+    ...     return {"single_param": estimator.__class__.__name__}
+    >>> estimator._doc_link_url_param_generator = url_param_generator
+    >>> estimator._get_doc_link()
+    'https://website.com/BaseEstimator.html'
+    """
+
+    _doc_link_module = "sklearn"
+    _doc_link_url_param_generator = None
+
+    @property
+    def _doc_link_template(self):
+        sklearn_version = parse_version(__version__)
+        if sklearn_version.dev is None:
+            version_url = f"{sklearn_version.major}.{sklearn_version.minor}"
+        else:
+            version_url = "dev"
+        return getattr(
+            self,
+            "__doc_link_template",
+            (
+                f"https://scikit-learn.org/{version_url}/modules/generated/"
+                "{estimator_module}.{estimator_name}.html"
+            ),
+        )
+
+    @_doc_link_template.setter
+    def _doc_link_template(self, value):
+        setattr(self, "__doc_link_template", value)
+
+    def _get_doc_link(self):
+        """Generates a link to the API documentation for a given estimator.
+
+        This method generates the link to the estimator's documentation page
+        by using the template defined by the attribute `_doc_link_template`.
+
+        Returns
+        -------
+        url : str
+            The URL to the API documentation for this estimator. If the estimator does
+            not belong to module `_doc_link_module`, the empty string (i.e. `""`) is
+            returned.
+        """
+        if self.__class__.__module__.split(".")[0] != self._doc_link_module:
+            return ""
+
+        if self._doc_link_url_param_generator is None:
+            estimator_name = self.__class__.__name__
+            # Construct the estimator's module name, up to the first private submodule.
+            # This works because in scikit-learn all public estimators are exposed at
+            # that level, even if they actually live in a private sub-module.
+            estimator_module = ".".join(
+                itertools.takewhile(
+                    lambda part: not part.startswith("_"),
+                    self.__class__.__module__.split("."),
+                )
+            )
+            return self._doc_link_template.format(
+                estimator_module=estimator_module, estimator_name=estimator_name
+            )
+        return self._doc_link_template.format(
+            **self._doc_link_url_param_generator(self)
+        )
@@ -0,0 +1,18 @@
+# Author: Gael Varoquaux
+# License: BSD
+"""
+Uses C++ map containers for fast dict-like behavior with keys being
+integers, and values float.
+"""
+
+from libcpp.map cimport map as cpp_map
+
+from ._typedefs cimport float64_t, intp_t
+
+
+###############################################################################
+# An object to be used in Python
+
+cdef class IntFloatDict:
+    cdef cpp_map[intp_t, float64_t] my_map
+    cdef _to_arrays(self, intp_t [:] keys, float64_t [:] values)
@@ -0,0 +1,137 @@
+"""
+Uses C++ map containers for fast dict-like behavior with keys being
+integers, and values float.
+"""
+# Author: Gael Varoquaux
+# License: BSD
+
+# C++
+from cython.operator cimport dereference as deref, preincrement as inc
+from libcpp.utility cimport pair
+from libcpp.map cimport map as cpp_map
+
+import numpy as np
+
+from ._typedefs cimport float64_t, intp_t
+
+
+###############################################################################
+# An object to be used in Python
+
+# Lookup is faster than dict (up to 10 times), and so is full traversal
+# (up to 50 times), and assignment (up to 6 times), but creation is
+# slower (up to 3 times). Also, a large benefit is that memory
+# consumption is reduced a lot compared to a Python dict
+
+cdef class IntFloatDict:
+
+    def __init__(
+        self,
+        intp_t[:] keys,
+        float64_t[:] values,
+    ):
+        cdef int i
+        cdef int size = values.size
+        # Should check that sizes for keys and values are equal, and
+        # after should boundcheck(False)
+        for i in range(size):
+            self.my_map[keys[i]] = values[i]
+
+    def __len__(self):
+        return self.my_map.size()
+
+    def __getitem__(self, int key):
+        cdef cpp_map[intp_t, float64_t].iterator it = self.my_map.find(key)
+        if it == self.my_map.end():
+            # The key is not in the dict
+            raise KeyError('%i' % key)
+        return deref(it).second
+
+    def __setitem__(self, int key, float value):
+        self.my_map[key] = value
+
+    # Cython 0.20 generates buggy code below. Commenting this out for now
+    # and relying on the to_arrays method
+    # def __iter__(self):
+    #     cdef cpp_map[intp_t, float64_t].iterator it = self.my_map.begin()
+    #     cdef cpp_map[intp_t, float64_t].iterator end = self.my_map.end()
+    #     while it != end:
+    #         yield deref(it).first, deref(it).second
+    #         inc(it)
+
+    def __iter__(self):
+        cdef int size = self.my_map.size()
+        cdef intp_t [:] keys = np.empty(size, dtype=np.intp)
+        cdef float64_t [:] values = np.empty(size, dtype=np.float64)
+        self._to_arrays(keys, values)
+        cdef int idx
+        cdef intp_t key
+        cdef float64_t value
+        for idx in range(size):
+            key = keys[idx]
+            value = values[idx]
+            yield key, value
+
+    def to_arrays(self):
+        """Return the key, value representation of the IntFloatDict
+           object.
+
+           Returns
+           =======
+           keys : ndarray, shape (n_items, ), dtype=int
+                The indices of the data points
+           values : ndarray, shape (n_items, ), dtype=float
+                The values of the data points
+        """
+        cdef int size = self.my_map.size()
+        keys = np.empty(size, dtype=np.intp)
+        values = np.empty(size, dtype=np.float64)
+        self._to_arrays(keys, values)
+        return keys, values
+
+    cdef _to_arrays(self, intp_t [:] keys, float64_t [:] values):
+        # Internal version of to_arrays that takes already-initialized arrays
+        cdef cpp_map[intp_t, float64_t].iterator it = self.my_map.begin()
+        cdef cpp_map[intp_t, float64_t].iterator end = self.my_map.end()
+        cdef int index = 0
+        while it != end:
+            keys[index] = deref(it).first
+            values[index] = deref(it).second
+            inc(it)
+            index += 1
+
+    def update(self, IntFloatDict other):
+        cdef cpp_map[intp_t, float64_t].iterator it = other.my_map.begin()
+        cdef cpp_map[intp_t, float64_t].iterator end = other.my_map.end()
+        while it != end:
+            self.my_map[deref(it).first] = deref(it).second
+            inc(it)
+
+    def copy(self):
+        cdef IntFloatDict out_obj = IntFloatDict.__new__(IntFloatDict)
+        # The '=' operator is a copy operator for C++ maps
+        out_obj.my_map = self.my_map
+        return out_obj
+
+    def append(self, intp_t key, float64_t value):
+        # Construct our arguments
+        cdef pair[intp_t, float64_t] args
+        args.first = key
+        args.second = value
+        self.my_map.insert(args)
+
+
+###############################################################################
+# operation on dict
+
+def argmin(IntFloatDict d):
+    cdef cpp_map[intp_t, float64_t].iterator it = d.my_map.begin()
+    cdef cpp_map[intp_t, float64_t].iterator end = d.my_map.end()
+    cdef intp_t min_key = -1
+    cdef float64_t min_value = np.inf
+    while it != end:
+        if deref(it).second < min_value:
+            min_value = deref(it).second
+            min_key = deref(it).first
+        inc(it)
+    return min_key, min_value
@@ -0,0 +1,14 @@
+# Heap routines, used in various Cython implementations.
+
+from cython cimport floating
+
+from ._typedefs cimport intp_t
+
+
+cdef int heap_push(
+    floating* values,
+    intp_t* indices,
+    intp_t size,
+    floating val,
+    intp_t val_idx,
+) noexcept nogil
@@ -0,0 +1,85 @@
+from cython cimport floating
+
+from ._typedefs cimport intp_t
+
+
+cdef inline int heap_push(
+    floating* values,
+    intp_t* indices,
+    intp_t size,
+    floating val,
+    intp_t val_idx,
+) noexcept nogil:
+    """Push a tuple (val, val_idx) onto a fixed-size max-heap.
+
+    The max-heap is represented as a Structure of Arrays where:
+     - values is the array containing the data to construct the heap with
+     - indices is the array containing the indices (meta-data) of each value
+
+    Notes
+    -----
+    Arrays are manipulated via a pointer to there first element and their size
+    as to ease the processing of dynamically allocated buffers.
+
+    For instance, in pseudo-code:
+
+        values = [1.2, 0.4, 0.1],
+        indices = [42, 1, 5],
+        heap_push(
+            values=values,
+            indices=indices,
+            size=3,
+            val=0.2,
+            val_idx=4,
+        )
+
+    will modify values and indices inplace, giving at the end of the call:
+
+        values  == [0.4, 0.2, 0.1]
+        indices == [1, 4, 5]
+
+    """
+    cdef:
+        intp_t current_idx, left_child_idx, right_child_idx, swap_idx
+
+    # Check if val should be in heap
+    if val >= values[0]:
+        return 0
+
+    # Insert val at position zero
+    values[0] = val
+    indices[0] = val_idx
+
+    # Descend the heap, swapping values until the max heap criterion is met
+    current_idx = 0
+    while True:
+        left_child_idx = 2 * current_idx + 1
+        right_child_idx = left_child_idx + 1
+
+        if left_child_idx >= size:
+            break
+        elif right_child_idx >= size:
+            if values[left_child_idx] > val:
+                swap_idx = left_child_idx
+            else:
+                break
+        elif values[left_child_idx] >= values[right_child_idx]:
+            if val < values[left_child_idx]:
+                swap_idx = left_child_idx
+            else:
+                break
+        else:
+            if val < values[right_child_idx]:
+                swap_idx = right_child_idx
+            else:
+                break
+
+        values[current_idx] = values[swap_idx]
+        indices[current_idx] = indices[swap_idx]
+
+        current_idx = swap_idx
+
+    values[current_idx] = val
+    indices[current_idx] = val_idx
+
+    return 0
@@ -0,0 +1,635 @@
+import numbers
+import sys
+import warnings
+from collections import UserList
+from itertools import compress, islice
+
+import numpy as np
+from scipy.sparse import issparse
+
+from ._array_api import _is_numpy_namespace, get_namespace
+from ._param_validation import Interval, validate_params
+from .extmath import _approximate_mode
+from .validation import (
+    _is_arraylike_not_scalar,
+    _is_pandas_df,
+    _is_polars_df_or_series,
+    _use_interchange_protocol,
+    check_array,
+    check_consistent_length,
+    check_random_state,
+)
+
+
+def _array_indexing(array, key, key_dtype, axis):
+    """Index an array or scipy.sparse consistently across NumPy version."""
+    xp, is_array_api = get_namespace(array)
+    if is_array_api:
+        return xp.take(array, key, axis=axis)
+    if issparse(array) and key_dtype == "bool":
+        key = np.asarray(key)
+    if isinstance(key, tuple):
+        key = list(key)
+    return array[key, ...] if axis == 0 else array[:, key]
+
+
+def _pandas_indexing(X, key, key_dtype, axis):
+    """Index a pandas dataframe or a series."""
+    if _is_arraylike_not_scalar(key):
+        key = np.asarray(key)
+
+    if key_dtype == "int" and not (isinstance(key, slice) or np.isscalar(key)):
+        # using take() instead of iloc[] ensures the return value is a "proper"
+        # copy that will not raise SettingWithCopyWarning
+        return X.take(key, axis=axis)
+    else:
+        # check whether we should index with loc or iloc
+        indexer = X.iloc if key_dtype == "int" else X.loc
+        return indexer[:, key] if axis else indexer[key]
+
+
+def _list_indexing(X, key, key_dtype):
+    """Index a Python list."""
+    if np.isscalar(key) or isinstance(key, slice):
+        # key is a slice or a scalar
+        return X[key]
+    if key_dtype == "bool":
+        # key is a boolean array-like
+        return list(compress(X, key))
+    # key is a integer array-like of key
+    return [X[idx] for idx in key]
+
+
+def _polars_indexing(X, key, key_dtype, axis):
+    """Indexing X with polars interchange protocol."""
+    # Polars behavior is more consistent with lists
+    if isinstance(key, np.ndarray):
+        # Convert each element of the array to a Python scalar
+        key = key.tolist()
+    elif not (np.isscalar(key) or isinstance(key, slice)):
+        key = list(key)
+
+    if axis == 1:
+        # Here we are certain to have a polars DataFrame; which can be indexed with
+        # integer and string scalar, and list of integer, string and boolean
+        return X[:, key]
+
+    if key_dtype == "bool":
+        # Boolean mask can be indexed in the same way for Series and DataFrame (axis=0)
+        return X.filter(key)
+
+    # Integer scalar and list of integer can be indexed in the same way for Series and
+    # DataFrame (axis=0)
+    X_indexed = X[key]
+    if np.isscalar(key) and len(X.shape) == 2:
+        # `X_indexed` is a DataFrame with a single row; we return a Series to be
+        # consistent with pandas
+        pl = sys.modules["polars"]
+        return pl.Series(X_indexed.row(0))
+    return X_indexed
+
+
+def _determine_key_type(key, accept_slice=True):
+    """Determine the data type of key.
+
+    Parameters
+    ----------
+    key : scalar, slice or array-like
+        The key from which we want to infer the data type.
+
+    accept_slice : bool, default=True
+        Whether or not to raise an error if the key is a slice.
+
+    Returns
+    -------
+    dtype : {'int', 'str', 'bool', None}
+        Returns the data type of key.
+    """
+    err_msg = (
+        "No valid specification of the columns. Only a scalar, list or "
+        "slice of all integers or all strings, or boolean mask is "
+        "allowed"
+    )
+
+    dtype_to_str = {int: "int", str: "str", bool: "bool", np.bool_: "bool"}
+    array_dtype_to_str = {
+        "i": "int",
+        "u": "int",
+        "b": "bool",
+        "O": "str",
+        "U": "str",
+        "S": "str",
+    }
+
+    if key is None:
+        return None
+    if isinstance(key, tuple(dtype_to_str.keys())):
+        try:
+            return dtype_to_str[type(key)]
+        except KeyError:
+            raise ValueError(err_msg)
+    if isinstance(key, slice):
+        if not accept_slice:
+            raise TypeError(
+                "Only array-like or scalar are supported. A Python slice was given."
+            )
+        if key.start is None and key.stop is None:
+            return None
+        key_start_type = _determine_key_type(key.start)
+        key_stop_type = _determine_key_type(key.stop)
+        if key_start_type is not None and key_stop_type is not None:
+            if key_start_type != key_stop_type:
+                raise ValueError(err_msg)
+        if key_start_type is not None:
+            return key_start_type
+        return key_stop_type
+    # TODO(1.9) remove UserList when the force_int_remainder_cols param
+    # of ColumnTransformer is removed
+    if isinstance(key, (list, tuple, UserList)):
+        unique_key = set(key)
+        key_type = {_determine_key_type(elt) for elt in unique_key}
+        if not key_type:
+            return None
+        if len(key_type) != 1:
+            raise ValueError(err_msg)
+        return key_type.pop()
+    if hasattr(key, "dtype"):
+        xp, is_array_api = get_namespace(key)
+        # NumPy arrays are special-cased in their own branch because the Array API
+        # cannot handle object/string-based dtypes that are often used to index
+        # columns of dataframes by names.
+        if is_array_api and not _is_numpy_namespace(xp):
+            if xp.isdtype(key.dtype, "bool"):
+                return "bool"
+            elif xp.isdtype(key.dtype, "integral"):
+                return "int"
+            else:
+                raise ValueError(err_msg)
+        else:
+            try:
+                return array_dtype_to_str[key.dtype.kind]
+            except KeyError:
+                raise ValueError(err_msg)
+    raise ValueError(err_msg)
+
+
+def _safe_indexing(X, indices, *, axis=0):
+    """Return rows, items or columns of X using indices.
+
+    .. warning::
+
+        This utility is documented, but **private**. This means that
+        backward compatibility might be broken without any deprecation
+        cycle.
+
+    Parameters
+    ----------
+    X : array-like, sparse-matrix, list, pandas.DataFrame, pandas.Series
+        Data from which to sample rows, items or columns. `list` are only
+        supported when `axis=0`.
+    indices : bool, int, str, slice, array-like
+        - If `axis=0`, boolean and integer array-like, integer slice,
+          and scalar integer are supported.
+        - If `axis=1`:
+            - to select a single column, `indices` can be of `int` type for
+              all `X` types and `str` only for dataframe. The selected subset
+              will be 1D, unless `X` is a sparse matrix in which case it will
+              be 2D.
+            - to select multiples columns, `indices` can be one of the
+              following: `list`, `array`, `slice`. The type used in
+              these containers can be one of the following: `int`, 'bool' and
+              `str`. However, `str` is only supported when `X` is a dataframe.
+              The selected subset will be 2D.
+    axis : int, default=0
+        The axis along which `X` will be subsampled. `axis=0` will select
+        rows while `axis=1` will select columns.
+
+    Returns
+    -------
+    subset
+        Subset of X on axis 0 or 1.
+
+    Notes
+    -----
+    CSR, CSC, and LIL sparse matrices are supported. COO sparse matrices are
+    not supported.
+
+    Examples
+    --------
+    >>> import numpy as np
+    >>> from sklearn.utils import _safe_indexing
+    >>> data = np.array([[1, 2], [3, 4], [5, 6]])
+    >>> _safe_indexing(data, 0, axis=0)  # select the first row
+    array([1, 2])
+    >>> _safe_indexing(data, 0, axis=1)  # select the first column
+    array([1, 3, 5])
+    """
+    if indices is None:
+        return X
+
+    if axis not in (0, 1):
+        raise ValueError(
+            "'axis' should be either 0 (to index rows) or 1 (to index "
+            " column). Got {} instead.".format(axis)
+        )
+
+    indices_dtype = _determine_key_type(indices)
+
+    if axis == 0 and indices_dtype == "str":
+        raise ValueError("String indexing is not supported with 'axis=0'")
+
+    if axis == 1 and isinstance(X, list):
+        raise ValueError("axis=1 is not supported for lists")
+
+    if axis == 1 and hasattr(X, "shape") and len(X.shape) != 2:
+        raise ValueError(
+            "'X' should be a 2D NumPy array, 2D sparse matrix or "
+            "dataframe when indexing the columns (i.e. 'axis=1'). "
+            "Got {} instead with {} dimension(s).".format(type(X), len(X.shape))
+        )
+
+    if (
+        axis == 1
+        and indices_dtype == "str"
+        and not (_is_pandas_df(X) or _use_interchange_protocol(X))
+    ):
+        raise ValueError(
+            "Specifying the columns using strings is only supported for dataframes."
+        )
+
+    if hasattr(X, "iloc"):
+        # TODO: we should probably use _is_pandas_df_or_series(X) instead but this
+        # would require updating some tests such as test_train_test_split_mock_pandas.
+        return _pandas_indexing(X, indices, indices_dtype, axis=axis)
+    elif _is_polars_df_or_series(X):
+        return _polars_indexing(X, indices, indices_dtype, axis=axis)
+    elif hasattr(X, "shape"):
+        return _array_indexing(X, indices, indices_dtype, axis=axis)
+    else:
+        return _list_indexing(X, indices, indices_dtype)
+
+
+def _safe_assign(X, values, *, row_indexer=None, column_indexer=None):
+    """Safe assignment to a numpy array, sparse matrix, or pandas dataframe.
+
+    Parameters
+    ----------
+    X : {ndarray, sparse-matrix, dataframe}
+        Array to be modified. It is expected to be 2-dimensional.
+
+    values : ndarray
+        The values to be assigned to `X`.
+
+    row_indexer : array-like, dtype={int, bool}, default=None
+        A 1-dimensional array to select the rows of interest. If `None`, all
+        rows are selected.
+
+    column_indexer : array-like, dtype={int, bool}, default=None
+        A 1-dimensional array to select the columns of interest. If `None`, all
+        columns are selected.
+    """
+    row_indexer = slice(None, None, None) if row_indexer is None else row_indexer
+    column_indexer = (
+        slice(None, None, None) if column_indexer is None else column_indexer
+    )
+
+    if hasattr(X, "iloc"):  # pandas dataframe
+        with warnings.catch_warnings():
+            # pandas >= 1.5 raises a warning when using iloc to set values in a column
+            # that does not have the same type as the column being set. It happens
+            # for instance when setting a categorical column with a string.
+            # In the future the behavior won't change and the warning should disappear.
+            # TODO(1.3): check if the warning is still raised or remove the filter.
+            warnings.simplefilter("ignore", FutureWarning)
+            X.iloc[row_indexer, column_indexer] = values
+    else:  # numpy array or sparse matrix
+        X[row_indexer, column_indexer] = values
+
+
+def _get_column_indices_for_bool_or_int(key, n_columns):
+    # Convert key into list of positive integer indexes
+    try:
+        idx = _safe_indexing(np.arange(n_columns), key)
+    except IndexError as e:
+        raise ValueError(
+            f"all features must be in [0, {n_columns - 1}] or [-{n_columns}, 0]"
+        ) from e
+    return np.atleast_1d(idx).tolist()
+
+
+def _get_column_indices(X, key):
+    """Get feature column indices for input data X and key.
+
+    For accepted values of `key`, see the docstring of
+    :func:`_safe_indexing`.
+    """
+    key_dtype = _determine_key_type(key)
+    if _use_interchange_protocol(X):
+        return _get_column_indices_interchange(X.__dataframe__(), key, key_dtype)
+
+    n_columns = X.shape[1]
+    if isinstance(key, (list, tuple)) and not key:
+        # we get an empty list
+        return []
+    elif key_dtype in ("bool", "int"):
+        return _get_column_indices_for_bool_or_int(key, n_columns)
+    else:
+        try:
+            all_columns = X.columns
+        except AttributeError:
+            raise ValueError(
+                "Specifying the columns using strings is only supported for dataframes."
+            )
+        if isinstance(key, str):
+            columns = [key]
+        elif isinstance(key, slice):
+            start, stop = key.start, key.stop
+            if start is not None:
+                start = all_columns.get_loc(start)
+            if stop is not None:
+                # pandas indexing with strings is endpoint included
+                stop = all_columns.get_loc(stop) + 1
+            else:
+                stop = n_columns + 1
+            return list(islice(range(n_columns), start, stop))
+        else:
+            columns = list(key)
+
+        try:
+            column_indices = []
+            for col in columns:
+                col_idx = all_columns.get_loc(col)
+                if not isinstance(col_idx, numbers.Integral):
+                    raise ValueError(
+                        f"Selected columns, {columns}, are not unique in dataframe"
+                    )
+                column_indices.append(col_idx)
+
+        except KeyError as e:
+            raise ValueError("A given column is not a column of the dataframe") from e
+
+        return column_indices
+
+
+def _get_column_indices_interchange(X_interchange, key, key_dtype):
+    """Same as _get_column_indices but for X with __dataframe__ protocol."""
+
+    n_columns = X_interchange.num_columns()
+
+    if isinstance(key, (list, tuple)) and not key:
+        # we get an empty list
+        return []
+    elif key_dtype in ("bool", "int"):
+        return _get_column_indices_for_bool_or_int(key, n_columns)
+    else:
+        column_names = list(X_interchange.column_names())
+
+        if isinstance(key, slice):
+            if key.step not in [1, None]:
+                raise NotImplementedError("key.step must be 1 or None")
+            start, stop = key.start, key.stop
+            if start is not None:
+                start = column_names.index(start)
+
+            if stop is not None:
+                stop = column_names.index(stop) + 1
+            else:
+                stop = n_columns + 1
+            return list(islice(range(n_columns), start, stop))
+
+        selected_columns = [key] if np.isscalar(key) else key
+
+        try:
+            return [column_names.index(col) for col in selected_columns]
+        except ValueError as e:
+            raise ValueError("A given column is not a column of the dataframe") from e
+
+
+@validate_params(
+    {
+        "replace": ["boolean"],
+        "n_samples": [Interval(numbers.Integral, 1, None, closed="left"), None],
+        "random_state": ["random_state"],
+        "stratify": ["array-like", "sparse matrix", None],
+    },
+    prefer_skip_nested_validation=True,
+)
+def resample(*arrays, replace=True, n_samples=None, random_state=None, stratify=None):
+    """Resample arrays or sparse matrices in a consistent way.
+
+    The default strategy implements one step of the bootstrapping
+    procedure.
+
+    Parameters
+    ----------
+    *arrays : sequence of array-like of shape (n_samples,) or \
+            (n_samples, n_outputs)
+        Indexable data-structures can be arrays, lists, dataframes or scipy
+        sparse matrices with consistent first dimension.
+
+    replace : bool, default=True
+        Implements resampling with replacement. If False, this will implement
+        (sliced) random permutations.
+
+    n_samples : int, default=None
+        Number of samples to generate. If left to None this is
+        automatically set to the first dimension of the arrays.
+        If replace is False it should not be larger than the length of
+        arrays.
+
+    random_state : int, RandomState instance or None, default=None
+        Determines random number generation for shuffling
+        the data.
+        Pass an int for reproducible results across multiple function calls.
+        See :term:`Glossary <random_state>`.
+
+    stratify : {array-like, sparse matrix} of shape (n_samples,) or \
+            (n_samples, n_outputs), default=None
+        If not None, data is split in a stratified fashion, using this as
+        the class labels.
+
+    Returns
+    -------
+    resampled_arrays : sequence of array-like of shape (n_samples,) or \
+            (n_samples, n_outputs)
+        Sequence of resampled copies of the collections. The original arrays
+        are not impacted.
+
+    See Also
+    --------
+    shuffle : Shuffle arrays or sparse matrices in a consistent way.
+
+    Examples
+    --------
+    It is possible to mix sparse and dense arrays in the same run::
+
+      >>> import numpy as np
+      >>> X = np.array([[1., 0.], [2., 1.], [0., 0.]])
+      >>> y = np.array([0, 1, 2])
+
+      >>> from scipy.sparse import coo_matrix
+      >>> X_sparse = coo_matrix(X)
+
+      >>> from sklearn.utils import resample
+      >>> X, X_sparse, y = resample(X, X_sparse, y, random_state=0)
+      >>> X
+      array([[1., 0.],
+             [2., 1.],
+             [1., 0.]])
+
+      >>> X_sparse
+      <3x2 sparse matrix of type '<... 'numpy.float64'>'
+          with 4 stored elements in Compressed Sparse Row format>
+
+      >>> X_sparse.toarray()
+      array([[1., 0.],
+             [2., 1.],
+             [1., 0.]])
+
+      >>> y
+      array([0, 1, 0])
+
+      >>> resample(y, n_samples=2, random_state=0)
+      array([0, 1])
+
+    Example using stratification::
+
+      >>> y = [0, 0, 1, 1, 1, 1, 1, 1, 1]
+      >>> resample(y, n_samples=5, replace=False, stratify=y,
+      ...          random_state=0)
+      [1, 1, 1, 0, 1]
+    """
+    max_n_samples = n_samples
+    random_state = check_random_state(random_state)
+
+    if len(arrays) == 0:
+        return None
+
+    first = arrays[0]
+    n_samples = first.shape[0] if hasattr(first, "shape") else len(first)
+
+    if max_n_samples is None:
+        max_n_samples = n_samples
+    elif (max_n_samples > n_samples) and (not replace):
+        raise ValueError(
+            "Cannot sample %d out of arrays with dim %d when replace is False"
+            % (max_n_samples, n_samples)
+        )
+
+    check_consistent_length(*arrays)
+
+    if stratify is None:
+        if replace:
+            indices = random_state.randint(0, n_samples, size=(max_n_samples,))
+        else:
+            indices = np.arange(n_samples)
+            random_state.shuffle(indices)
+            indices = indices[:max_n_samples]
+    else:
+        # Code adapted from StratifiedShuffleSplit()
+        y = check_array(stratify, ensure_2d=False, dtype=None)
+        if y.ndim == 2:
+            # for multi-label y, map each distinct row to a string repr
+            # using join because str(row) uses an ellipsis if len(row) > 1000
+            y = np.array([" ".join(row.astype("str")) for row in y])
+
+        classes, y_indices = np.unique(y, return_inverse=True)
+        n_classes = classes.shape[0]
+
+        class_counts = np.bincount(y_indices)
+
+        # Find the sorted list of instances for each class:
+        # (np.unique above performs a sort, so code is O(n logn) already)
+        class_indices = np.split(
+            np.argsort(y_indices, kind="mergesort"), np.cumsum(class_counts)[:-1]
+        )
+
+        n_i = _approximate_mode(class_counts, max_n_samples, random_state)
+
+        indices = []
+
+        for i in range(n_classes):
+            indices_i = random_state.choice(class_indices[i], n_i[i], replace=replace)
+            indices.extend(indices_i)
+
+        indices = random_state.permutation(indices)
+
+    # convert sparse matrices to CSR for row-based indexing
+    arrays = [a.tocsr() if issparse(a) else a for a in arrays]
+    resampled_arrays = [_safe_indexing(a, indices) for a in arrays]
+    if len(resampled_arrays) == 1:
+        # syntactic sugar for the unit argument case
+        return resampled_arrays[0]
+    else:
+        return resampled_arrays
+
+
+def shuffle(*arrays, random_state=None, n_samples=None):
+    """Shuffle arrays or sparse matrices in a consistent way.
+
+    This is a convenience alias to ``resample(*arrays, replace=False)`` to do
+    random permutations of the collections.
+
+    Parameters
+    ----------
+    *arrays : sequence of indexable data-structures
+        Indexable data-structures can be arrays, lists, dataframes or scipy
+        sparse matrices with consistent first dimension.
+
+    random_state : int, RandomState instance or None, default=None
+        Determines random number generation for shuffling
+        the data.
+        Pass an int for reproducible results across multiple function calls.
+        See :term:`Glossary <random_state>`.
+
+    n_samples : int, default=None
+        Number of samples to generate. If left to None this is
+        automatically set to the first dimension of the arrays.  It should
+        not be larger than the length of arrays.
+
+    Returns
+    -------
+    shuffled_arrays : sequence of indexable data-structures
+        Sequence of shuffled copies of the collections. The original arrays
+        are not impacted.
+
+    See Also
+    --------
+    resample : Resample arrays or sparse matrices in a consistent way.
+
+    Examples
+    --------
+    It is possible to mix sparse and dense arrays in the same run::
+
+      >>> import numpy as np
+      >>> X = np.array([[1., 0.], [2., 1.], [0., 0.]])
+      >>> y = np.array([0, 1, 2])
+
+      >>> from scipy.sparse import coo_matrix
+      >>> X_sparse = coo_matrix(X)
+
+      >>> from sklearn.utils import shuffle
+      >>> X, X_sparse, y = shuffle(X, X_sparse, y, random_state=0)
+      >>> X
+      array([[0., 0.],
+             [2., 1.],
+             [1., 0.]])
+
+      >>> X_sparse
+      <3x2 sparse matrix of type '<... 'numpy.float64'>'
+          with 3 stored elements in Compressed Sparse Row format>
+
+      >>> X_sparse.toarray()
+      array([[0., 0.],
+             [2., 1.],
+             [1., 0.]])
+
+      >>> y
+      array([2, 1, 0])
+
+      >>> shuffle(y, n_samples=2, random_state=0)
+      array([0, 1])
+    """
+    return resample(
+        *arrays, replace=False, n_samples=n_samples, random_state=random_state
+    )
@@ -0,0 +1,50 @@
+# Author: John Kirkham, Meekail Zain, Thomas Fan
+
+from libc.math cimport isnan, isinf
+from cython cimport floating
+
+
+cpdef enum FiniteStatus:
+    all_finite = 0
+    has_nan = 1
+    has_infinite = 2
+
+
+def cy_isfinite(floating[::1] a, bint allow_nan=False):
+    cdef FiniteStatus result
+    with nogil:
+        result = _isfinite(a, allow_nan)
+    return result
+
+
+cdef inline FiniteStatus _isfinite(floating[::1] a, bint allow_nan) noexcept nogil:
+    cdef floating* a_ptr = &a[0]
+    cdef Py_ssize_t length = len(a)
+    if allow_nan:
+        return _isfinite_allow_nan(a_ptr, length)
+    else:
+        return _isfinite_disable_nan(a_ptr, length)
+
+
+cdef inline FiniteStatus _isfinite_allow_nan(floating* a_ptr,
+                                             Py_ssize_t length) noexcept nogil:
+    cdef Py_ssize_t i
+    cdef floating v
+    for i in range(length):
+        v = a_ptr[i]
+        if isinf(v):
+            return FiniteStatus.has_infinite
+    return FiniteStatus.all_finite
+
+
+cdef inline FiniteStatus _isfinite_disable_nan(floating* a_ptr,
+                                               Py_ssize_t length) noexcept nogil:
+    cdef Py_ssize_t i
+    cdef floating v
+    for i in range(length):
+        v = a_ptr[i]
+        if isnan(v):
+            return FiniteStatus.has_nan
+        elif isinf(v):
+            return FiniteStatus.has_infinite
+    return FiniteStatus.all_finite
@@ -0,0 +1,40 @@
+# TODO(1.7): remove this file
+
+import warnings as _warnings
+
+with _warnings.catch_warnings():
+    _warnings.simplefilter("ignore")
+    # joblib imports may raise DeprecationWarning on certain Python
+    # versions
+    import joblib
+    from joblib import (
+        Memory,
+        Parallel,
+        __version__,
+        cpu_count,
+        delayed,
+        dump,
+        effective_n_jobs,
+        hash,
+        load,
+        logger,
+        parallel_backend,
+        register_parallel_backend,
+    )
+
+
+__all__ = [
+    "parallel_backend",
+    "register_parallel_backend",
+    "cpu_count",
+    "Parallel",
+    "Memory",
+    "delayed",
+    "effective_n_jobs",
+    "hash",
+    "logger",
+    "dump",
+    "load",
+    "joblib",
+    "__version__",
+]
@@ -0,0 +1,178 @@
+from contextlib import suppress
+
+import numpy as np
+from scipy import sparse as sp
+
+from ._missing import is_scalar_nan
+from ._param_validation import validate_params
+from .fixes import _object_dtype_isnan
+
+
+def _get_dense_mask(X, value_to_mask):
+    with suppress(ImportError, AttributeError):
+        # We also suppress `AttributeError` because older versions of pandas do
+        # not have `NA`.
+        import pandas
+
+        if value_to_mask is pandas.NA:
+            return pandas.isna(X)
+
+    if is_scalar_nan(value_to_mask):
+        if X.dtype.kind == "f":
+            Xt = np.isnan(X)
+        elif X.dtype.kind in ("i", "u"):
+            # can't have NaNs in integer array.
+            Xt = np.zeros(X.shape, dtype=bool)
+        else:
+            # np.isnan does not work on object dtypes.
+            Xt = _object_dtype_isnan(X)
+    else:
+        Xt = X == value_to_mask
+
+    return Xt
+
+
+def _get_mask(X, value_to_mask):
+    """Compute the boolean mask X == value_to_mask.
+
+    Parameters
+    ----------
+    X : {ndarray, sparse matrix} of shape (n_samples, n_features)
+        Input data, where ``n_samples`` is the number of samples and
+        ``n_features`` is the number of features.
+
+    value_to_mask : {int, float}
+        The value which is to be masked in X.
+
+    Returns
+    -------
+    X_mask : {ndarray, sparse matrix} of shape (n_samples, n_features)
+        Missing mask.
+    """
+    if not sp.issparse(X):
+        # For all cases apart of a sparse input where we need to reconstruct
+        # a sparse output
+        return _get_dense_mask(X, value_to_mask)
+
+    Xt = _get_dense_mask(X.data, value_to_mask)
+
+    sparse_constructor = sp.csr_matrix if X.format == "csr" else sp.csc_matrix
+    Xt_sparse = sparse_constructor(
+        (Xt, X.indices.copy(), X.indptr.copy()), shape=X.shape, dtype=bool
+    )
+
+    return Xt_sparse
+
+
+@validate_params(
+    {
+        "X": ["array-like", "sparse matrix"],
+        "mask": ["array-like"],
+    },
+    prefer_skip_nested_validation=True,
+)
+def safe_mask(X, mask):
+    """Return a mask which is safe to use on X.
+
+    Parameters
+    ----------
+    X : {array-like, sparse matrix}
+        Data on which to apply mask.
+
+    mask : array-like
+        Mask to be used on X.
+
+    Returns
+    -------
+    mask : ndarray
+        Array that is safe to use on X.
+
+    Examples
+    --------
+    >>> from sklearn.utils import safe_mask
+    >>> from scipy.sparse import csr_matrix
+    >>> data = csr_matrix([[1], [2], [3], [4], [5]])
+    >>> condition = [False, True, True, False, True]
+    >>> mask = safe_mask(data, condition)
+    >>> data[mask].toarray()
+    array([[2],
+           [3],
+           [5]])
+    """
+    mask = np.asarray(mask)
+    if np.issubdtype(mask.dtype, np.signedinteger):
+        return mask
+
+    if hasattr(X, "toarray"):
+        ind = np.arange(mask.shape[0])
+        mask = ind[mask]
+    return mask
+
+
+def axis0_safe_slice(X, mask, len_mask):
+    """Return a mask which is safer to use on X than safe_mask.
+
+    This mask is safer than safe_mask since it returns an
+    empty array, when a sparse matrix is sliced with a boolean mask
+    with all False, instead of raising an unhelpful error in older
+    versions of SciPy.
+
+    See: https://github.com/scipy/scipy/issues/5361
+
+    Also note that we can avoid doing the dot product by checking if
+    the len_mask is not zero in _huber_loss_and_gradient but this
+    is not going to be the bottleneck, since the number of outliers
+    and non_outliers are typically non-zero and it makes the code
+    tougher to follow.
+
+    Parameters
+    ----------
+    X : {array-like, sparse matrix}
+        Data on which to apply mask.
+
+    mask : ndarray
+        Mask to be used on X.
+
+    len_mask : int
+        The length of the mask.
+
+    Returns
+    -------
+    mask : ndarray
+        Array that is safe to use on X.
+    """
+    if len_mask != 0:
+        return X[safe_mask(X, mask), :]
+    return np.zeros(shape=(0, X.shape[1]))
+
+
+def indices_to_mask(indices, mask_length):
+    """Convert list of indices to boolean mask.
+
+    Parameters
+    ----------
+    indices : list-like
+        List of integers treated as indices.
+    mask_length : int
+        Length of boolean mask to be generated.
+        This parameter must be greater than max(indices).
+
+    Returns
+    -------
+    mask : 1d boolean nd-array
+        Boolean array that is True where indices are present, else False.
+
+    Examples
+    --------
+    >>> from sklearn.utils._mask import indices_to_mask
+    >>> indices = [1, 2 , 3, 4]
+    >>> indices_to_mask(indices, 5)
+    array([False,  True,  True,  True,  True])
+    """
+    if mask_length <= np.max(indices):
+        raise ValueError("mask_length must be greater than max(indices)")
+
+    mask = np.zeros(mask_length, dtype=bool)
+    mask[indices] = True
+
+    return mask
@@ -0,0 +1,65 @@
+import math
+import numbers
+from contextlib import suppress
+
+
+def is_scalar_nan(x):
+    """Test if x is NaN.
+
+    This function is meant to overcome the issue that np.isnan does not allow
+    non-numerical types as input, and that np.nan is not float('nan').
+
+    Parameters
+    ----------
+    x : any type
+        Any scalar value.
+
+    Returns
+    -------
+    bool
+        Returns true if x is NaN, and false otherwise.
+
+    Examples
+    --------
+    >>> import numpy as np
+    >>> from sklearn.utils._missing import is_scalar_nan
+    >>> is_scalar_nan(np.nan)
+    True
+    >>> is_scalar_nan(float("nan"))
+    True
+    >>> is_scalar_nan(None)
+    False
+    >>> is_scalar_nan("")
+    False
+    >>> is_scalar_nan([np.nan])
+    False
+    """
+    return (
+        not isinstance(x, numbers.Integral)
+        and isinstance(x, numbers.Real)
+        and math.isnan(x)
+    )
+
+
+def is_pandas_na(x):
+    """Test if x is pandas.NA.
+
+    We intentionally do not use this function to return `True` for `pd.NA` in
+    `is_scalar_nan`, because estimators that support `pd.NA` are the exception
+    rather than the rule at the moment. When `pd.NA` is more universally
+    supported, we may reconsider this decision.
+
+    Parameters
+    ----------
+    x : any type
+
+    Returns
+    -------
+    boolean
+    """
+    with suppress(ImportError):
+        from pandas import NA
+
+        return x is NA
+
+    return False
@@ -0,0 +1,410 @@
+import numpy as np
+
+from ..base import BaseEstimator, ClassifierMixin
+from ..utils._metadata_requests import RequestMethod
+from .metaestimators import available_if
+from .validation import (
+    _check_sample_weight,
+    _num_samples,
+    check_array,
+    check_is_fitted,
+    check_random_state,
+)
+
+
+class ArraySlicingWrapper:
+    """
+    Parameters
+    ----------
+    array
+    """
+
+    def __init__(self, array):
+        self.array = array
+
+    def __getitem__(self, aslice):
+        return MockDataFrame(self.array[aslice])
+
+
+class MockDataFrame:
+    """
+    Parameters
+    ----------
+    array
+    """
+
+    # have shape and length but don't support indexing.
+
+    def __init__(self, array):
+        self.array = array
+        self.values = array
+        self.shape = array.shape
+        self.ndim = array.ndim
+        # ugly hack to make iloc work.
+        self.iloc = ArraySlicingWrapper(array)
+
+    def __len__(self):
+        return len(self.array)
+
+    def __array__(self, dtype=None):
+        # Pandas data frames also are array-like: we want to make sure that
+        # input validation in cross-validation does not try to call that
+        # method.
+        return self.array
+
+    def __eq__(self, other):
+        return MockDataFrame(self.array == other.array)
+
+    def __ne__(self, other):
+        return not self == other
+
+    def take(self, indices, axis=0):
+        return MockDataFrame(self.array.take(indices, axis=axis))
+
+
+class CheckingClassifier(ClassifierMixin, BaseEstimator):
+    """Dummy classifier to test pipelining and meta-estimators.
+
+    Checks some property of `X` and `y`in fit / predict.
+    This allows testing whether pipelines / cross-validation or metaestimators
+    changed the input.
+
+    Can also be used to check if `fit_params` are passed correctly, and
+    to force a certain score to be returned.
+
+    Parameters
+    ----------
+    check_y, check_X : callable, default=None
+        The callable used to validate `X` and `y`. These callable should return
+        a bool where `False` will trigger an `AssertionError`. If `None`, the
+        data is not validated. Default is `None`.
+
+    check_y_params, check_X_params : dict, default=None
+        The optional parameters to pass to `check_X` and `check_y`. If `None`,
+        then no parameters are passed in.
+
+    methods_to_check : "all" or list of str, default="all"
+        The methods in which the checks should be applied. By default,
+        all checks will be done on all methods (`fit`, `predict`,
+        `predict_proba`, `decision_function` and `score`).
+
+    foo_param : int, default=0
+        A `foo` param. When `foo > 1`, the output of :meth:`score` will be 1
+        otherwise it is 0.
+
+    expected_sample_weight : bool, default=False
+        Whether to check if a valid `sample_weight` was passed to `fit`.
+
+    expected_fit_params : list of str, default=None
+        A list of the expected parameters given when calling `fit`.
+
+    Attributes
+    ----------
+    classes_ : int
+        The classes seen during `fit`.
+
+    n_features_in_ : int
+        The number of features seen during `fit`.
+
+    Examples
+    --------
+    >>> from sklearn.utils._mocking import CheckingClassifier
+
+    This helper allow to assert to specificities regarding `X` or `y`. In this
+    case we expect `check_X` or `check_y` to return a boolean.
+
+    >>> from sklearn.datasets import load_iris
+    >>> X, y = load_iris(return_X_y=True)
+    >>> clf = CheckingClassifier(check_X=lambda x: x.shape == (150, 4))
+    >>> clf.fit(X, y)
+    CheckingClassifier(...)
+
+    We can also provide a check which might raise an error. In this case, we
+    expect `check_X` to return `X` and `check_y` to return `y`.
+
+    >>> from sklearn.utils import check_array
+    >>> clf = CheckingClassifier(check_X=check_array)
+    >>> clf.fit(X, y)
+    CheckingClassifier(...)
+    """
+
+    def __init__(
+        self,
+        *,
+        check_y=None,
+        check_y_params=None,
+        check_X=None,
+        check_X_params=None,
+        methods_to_check="all",
+        foo_param=0,
+        expected_sample_weight=None,
+        expected_fit_params=None,
+        random_state=None,
+    ):
+        self.check_y = check_y
+        self.check_y_params = check_y_params
+        self.check_X = check_X
+        self.check_X_params = check_X_params
+        self.methods_to_check = methods_to_check
+        self.foo_param = foo_param
+        self.expected_sample_weight = expected_sample_weight
+        self.expected_fit_params = expected_fit_params
+        self.random_state = random_state
+
+    def _check_X_y(self, X, y=None, should_be_fitted=True):
+        """Validate X and y and make extra check.
+
+        Parameters
+        ----------
+        X : array-like of shape (n_samples, n_features)
+            The data set.
+            `X` is checked only if `check_X` is not `None` (default is None).
+        y : array-like of shape (n_samples), default=None
+            The corresponding target, by default `None`.
+            `y` is checked only if `check_y` is not `None` (default is None).
+        should_be_fitted : bool, default=True
+            Whether or not the classifier should be already fitted.
+            By default True.
+
+        Returns
+        -------
+        X, y
+        """
+        if should_be_fitted:
+            check_is_fitted(self)
+        if self.check_X is not None:
+            params = {} if self.check_X_params is None else self.check_X_params
+            checked_X = self.check_X(X, **params)
+            if isinstance(checked_X, (bool, np.bool_)):
+                assert checked_X
+            else:
+                X = checked_X
+        if y is not None and self.check_y is not None:
+            params = {} if self.check_y_params is None else self.check_y_params
+            checked_y = self.check_y(y, **params)
+            if isinstance(checked_y, (bool, np.bool_)):
+                assert checked_y
+            else:
+                y = checked_y
+        return X, y
+
+    def fit(self, X, y, sample_weight=None, **fit_params):
+        """Fit classifier.
+
+        Parameters
+        ----------
+        X : array-like of shape (n_samples, n_features)
+            Training vector, where `n_samples` is the number of samples and
+            `n_features` is the number of features.
+
+        y : array-like of shape (n_samples, n_outputs) or (n_samples,), \
+                default=None
+            Target relative to X for classification or regression;
+            None for unsupervised learning.
+
+        sample_weight : array-like of shape (n_samples,), default=None
+            Sample weights. If None, then samples are equally weighted.
+
+        **fit_params : dict of string -> object
+            Parameters passed to the ``fit`` method of the estimator
+
+        Returns
+        -------
+        self
+        """
+        assert _num_samples(X) == _num_samples(y)
+        if self.methods_to_check == "all" or "fit" in self.methods_to_check:
+            X, y = self._check_X_y(X, y, should_be_fitted=False)
+        self.n_features_in_ = np.shape(X)[1]
+        self.classes_ = np.unique(check_array(y, ensure_2d=False, allow_nd=True))
+        if self.expected_fit_params:
+            missing = set(self.expected_fit_params) - set(fit_params)
+            if missing:
+                raise AssertionError(
+                    f"Expected fit parameter(s) {list(missing)} not seen."
+                )
+            for key, value in fit_params.items():
+                if _num_samples(value) != _num_samples(X):
+                    raise AssertionError(
+                        f"Fit parameter {key} has length {_num_samples(value)}"
+                        f"; expected {_num_samples(X)}."
+                    )
+        if self.expected_sample_weight:
+            if sample_weight is None:
+                raise AssertionError("Expected sample_weight to be passed")
+            _check_sample_weight(sample_weight, X)
+
+        return self
+
+    def predict(self, X):
+        """Predict the first class seen in `classes_`.
+
+        Parameters
+        ----------
+        X : array-like of shape (n_samples, n_features)
+            The input data.
+
+        Returns
+        -------
+        preds : ndarray of shape (n_samples,)
+            Predictions of the first class seens in `classes_`.
+        """
+        if self.methods_to_check == "all" or "predict" in self.methods_to_check:
+            X, y = self._check_X_y(X)
+        rng = check_random_state(self.random_state)
+        return rng.choice(self.classes_, size=_num_samples(X))
+
+    def predict_proba(self, X):
+        """Predict probabilities for each class.
+
+        Here, the dummy classifier will provide a probability of 1 for the
+        first class of `classes_` and 0 otherwise.
+
+        Parameters
+        ----------
+        X : array-like of shape (n_samples, n_features)
+            The input data.
+
+        Returns
+        -------
+        proba : ndarray of shape (n_samples, n_classes)
+            The probabilities for each sample and class.
+        """
+        if self.methods_to_check == "all" or "predict_proba" in self.methods_to_check:
+            X, y = self._check_X_y(X)
+        rng = check_random_state(self.random_state)
+        proba = rng.randn(_num_samples(X), len(self.classes_))
+        proba = np.abs(proba, out=proba)
+        proba /= np.sum(proba, axis=1)[:, np.newaxis]
+        return proba
+
+    def decision_function(self, X):
+        """Confidence score.
+
+        Parameters
+        ----------
+        X : array-like of shape (n_samples, n_features)
+            The input data.
+
+        Returns
+        -------
+        decision : ndarray of shape (n_samples,) if n_classes == 2\
+                else (n_samples, n_classes)
+            Confidence score.
+        """
+        if (
+            self.methods_to_check == "all"
+            or "decision_function" in self.methods_to_check
+        ):
+            X, y = self._check_X_y(X)
+        rng = check_random_state(self.random_state)
+        if len(self.classes_) == 2:
+            # for binary classifier, the confidence score is related to
+            # classes_[1] and therefore should be null.
+            return rng.randn(_num_samples(X))
+        else:
+            return rng.randn(_num_samples(X), len(self.classes_))
+
+    def score(self, X=None, Y=None):
+        """Fake score.
+
+        Parameters
+        ----------
+        X : array-like of shape (n_samples, n_features)
+            Input data, where `n_samples` is the number of samples and
+            `n_features` is the number of features.
+
+        Y : array-like of shape (n_samples, n_output) or (n_samples,)
+            Target relative to X for classification or regression;
+            None for unsupervised learning.
+
+        Returns
+        -------
+        score : float
+            Either 0 or 1 depending of `foo_param` (i.e. `foo_param > 1 =>
+            score=1` otherwise `score=0`).
+        """
+        if self.methods_to_check == "all" or "score" in self.methods_to_check:
+            self._check_X_y(X, Y)
+        if self.foo_param > 1:
+            score = 1.0
+        else:
+            score = 0.0
+        return score
+
+    def _more_tags(self):
+        return {"_skip_test": True, "X_types": ["1dlabel"]}
+
+
+# Deactivate key validation for CheckingClassifier because we want to be able to
+# call fit with arbitrary fit_params and record them. Without this change, we
+# would get an error because those arbitrary params are not expected.
+CheckingClassifier.set_fit_request = RequestMethod(  # type: ignore
+    name="fit", keys=[], validate_keys=False
+)
+
+
+class NoSampleWeightWrapper(BaseEstimator):
+    """Wrap estimator which will not expose `sample_weight`.
+
+    Parameters
+    ----------
+    est : estimator, default=None
+        The estimator to wrap.
+    """
+
+    def __init__(self, est=None):
+        self.est = est
+
+    def fit(self, X, y):
+        return self.est.fit(X, y)
+
+    def predict(self, X):
+        return self.est.predict(X)
+
+    def predict_proba(self, X):
+        return self.est.predict_proba(X)
+
+    def _more_tags(self):
+        return {"_skip_test": True}
+
+
+def _check_response(method):
+    def check(self):
+        return self.response_methods is not None and method in self.response_methods
+
+    return check
+
+
+class _MockEstimatorOnOffPrediction(BaseEstimator):
+    """Estimator for which we can turn on/off the prediction methods.
+
+    Parameters
+    ----------
+    response_methods: list of \
+            {"predict", "predict_proba", "decision_function"}, default=None
+        List containing the response implemented by the estimator. When, the
+        response is in the list, it will return the name of the response method
+        when called. Otherwise, an `AttributeError` is raised. It allows to
+        use `getattr` as any conventional estimator. By default, no response
+        methods are mocked.
+    """
+
+    def __init__(self, response_methods=None):
+        self.response_methods = response_methods
+
+    def fit(self, X, y):
+        self.classes_ = np.unique(y)
+        return self
+
+    @available_if(_check_response("predict"))
+    def predict(self, X):
+        return "predict"
+
+    @available_if(_check_response("predict_proba"))
+    def predict_proba(self, X):
+        return "predict_proba"
+
+    @available_if(_check_response("decision_function"))
+    def decision_function(self, X):
+        return "decision_function"
@@ -0,0 +1,33 @@
+# Helpers to safely access OpenMP routines
+#
+# no-op implementations are provided for the case where OpenMP is not available.
+#
+# All calls to OpenMP routines should be cimported from this module.
+
+cdef extern from *:
+    """
+    #ifdef _OPENMP
+        #include <omp.h>
+        #define SKLEARN_OPENMP_PARALLELISM_ENABLED 1
+    #else
+        #define SKLEARN_OPENMP_PARALLELISM_ENABLED 0
+        #define omp_lock_t int
+        #define omp_init_lock(l) (void)0
+        #define omp_destroy_lock(l) (void)0
+        #define omp_set_lock(l) (void)0
+        #define omp_unset_lock(l) (void)0
+        #define omp_get_thread_num() 0
+        #define omp_get_max_threads() 1
+    #endif
+    """
+    bint SKLEARN_OPENMP_PARALLELISM_ENABLED
+
+    ctypedef struct omp_lock_t:
+        pass
+
+    void omp_init_lock(omp_lock_t*) noexcept nogil
+    void omp_destroy_lock(omp_lock_t*) noexcept nogil
+    void omp_set_lock(omp_lock_t*) noexcept nogil
+    void omp_unset_lock(omp_lock_t*) noexcept nogil
+    int omp_get_thread_num() noexcept nogil
+    int omp_get_max_threads() noexcept nogil
@@ -0,0 +1,77 @@
+import os
+from joblib import cpu_count
+
+
+# Module level cache for cpu_count as we do not expect this to change during
+# the lifecycle of a Python program. This dictionary is keyed by
+# only_physical_cores.
+_CPU_COUNTS = {}
+
+
+def _openmp_parallelism_enabled():
+    """Determines whether scikit-learn has been built with OpenMP
+
+    It allows to retrieve at runtime the information gathered at compile time.
+    """
+    # SKLEARN_OPENMP_PARALLELISM_ENABLED is resolved at compile time and defined
+    # in _openmp_helpers.pxd as a boolean. This function exposes it to Python.
+    return SKLEARN_OPENMP_PARALLELISM_ENABLED
+
+
+cpdef _openmp_effective_n_threads(n_threads=None, only_physical_cores=True):
+    """Determine the effective number of threads to be used for OpenMP calls
+
+    - For ``n_threads = None``,
+      - if the ``OMP_NUM_THREADS`` environment variable is set, return
+        ``openmp.omp_get_max_threads()``
+      - otherwise, return the minimum between ``openmp.omp_get_max_threads()``
+        and the number of cpus, taking cgroups quotas into account. Cgroups
+        quotas can typically be set by tools such as Docker.
+      The result of ``omp_get_max_threads`` can be influenced by environment
+      variable ``OMP_NUM_THREADS`` or at runtime by ``omp_set_num_threads``.
+
+    - For ``n_threads > 0``, return this as the maximal number of threads for
+      parallel OpenMP calls.
+
+    - For ``n_threads < 0``, return the maximal number of threads minus
+      ``|n_threads + 1|``. In particular ``n_threads = -1`` will use as many
+      threads as there are available cores on the machine.
+
+    - Raise a ValueError for ``n_threads = 0``.
+
+    Passing the `only_physical_cores=False` flag makes it possible to use extra
+    threads for SMT/HyperThreading logical cores. It has been empirically
+    observed that using as many threads as available SMT cores can slightly
+    improve the performance in some cases, but can severely degrade
+    performance other times. Therefore it is recommended to use
+    `only_physical_cores=True` unless an empirical study has been conducted to
+    assess the impact of SMT on a case-by-case basis (using various input data
+    shapes, in particular small data shapes).
+
+    If scikit-learn is built without OpenMP support, always return 1.
+    """
+    if n_threads == 0:
+        raise ValueError("n_threads = 0 is invalid")
+
+    if not SKLEARN_OPENMP_PARALLELISM_ENABLED:
+        # OpenMP disabled at build-time => sequential mode
+        return 1
+
+    if os.getenv("OMP_NUM_THREADS"):
+        # Fall back to user provided number of threads making it possible
+        # to exceed the number of cpus.
+        max_n_threads = omp_get_max_threads()
+    else:
+        try:
+            n_cpus = _CPU_COUNTS[only_physical_cores]
+        except KeyError:
+            n_cpus = cpu_count(only_physical_cores=only_physical_cores)
+            _CPU_COUNTS[only_physical_cores] = n_cpus
+        max_n_threads = min(omp_get_max_threads(), n_cpus)
+
+    if n_threads is None:
+        return max_n_threads
+    elif n_threads < 0:
+        return max(1, max_n_threads + n_threads + 1)
+
+    return n_threads
@@ -0,0 +1,42 @@
+def check_matplotlib_support(caller_name):
+    """Raise ImportError with detailed error message if mpl is not installed.
+
+    Plot utilities like any of the Display's plotting functions should lazily import
+    matplotlib and call this helper before any computation.
+
+    Parameters
+    ----------
+    caller_name : str
+        The name of the caller that requires matplotlib.
+    """
+    try:
+        import matplotlib  # noqa
+    except ImportError as e:
+        raise ImportError(
+            "{} requires matplotlib. You can install matplotlib with "
+            "`pip install matplotlib`".format(caller_name)
+        ) from e
+
+
+def check_pandas_support(caller_name):
+    """Raise ImportError with detailed error message if pandas is not installed.
+
+    Plot utilities like :func:`fetch_openml` should lazily import
+    pandas and call this helper before any computation.
+
+    Parameters
+    ----------
+    caller_name : str
+        The name of the caller that requires pandas.
+
+    Returns
+    -------
+    pandas
+        The pandas package.
+    """
+    try:
+        import pandas  # noqa
+
+        return pandas
+    except ImportError as e:
+        raise ImportError("{} requires pandas.".format(caller_name)) from e
@@ -0,0 +1,905 @@
+import functools
+import math
+import operator
+import re
+from abc import ABC, abstractmethod
+from collections.abc import Iterable
+from inspect import signature
+from numbers import Integral, Real
+
+import numpy as np
+from scipy.sparse import csr_matrix, issparse
+
+from .._config import config_context, get_config
+from .validation import _is_arraylike_not_scalar
+
+
+class InvalidParameterError(ValueError, TypeError):
+    """Custom exception to be raised when the parameter of a class/method/function
+    does not have a valid type or value.
+    """
+
+    # Inherits from ValueError and TypeError to keep backward compatibility.
+
+
+def validate_parameter_constraints(parameter_constraints, params, caller_name):
+    """Validate types and values of given parameters.
+
+    Parameters
+    ----------
+    parameter_constraints : dict or {"no_validation"}
+        If "no_validation", validation is skipped for this parameter.
+
+        If a dict, it must be a dictionary `param_name: list of constraints`.
+        A parameter is valid if it satisfies one of the constraints from the list.
+        Constraints can be:
+        - an Interval object, representing a continuous or discrete range of numbers
+        - the string "array-like"
+        - the string "sparse matrix"
+        - the string "random_state"
+        - callable
+        - None, meaning that None is a valid value for the parameter
+        - any type, meaning that any instance of this type is valid
+        - an Options object, representing a set of elements of a given type
+        - a StrOptions object, representing a set of strings
+        - the string "boolean"
+        - the string "verbose"
+        - the string "cv_object"
+        - the string "nan"
+        - a MissingValues object representing markers for missing values
+        - a HasMethods object, representing method(s) an object must have
+        - a Hidden object, representing a constraint not meant to be exposed to the user
+
+    params : dict
+        A dictionary `param_name: param_value`. The parameters to validate against the
+        constraints.
+
+    caller_name : str
+        The name of the estimator or function or method that called this function.
+    """
+    for param_name, param_val in params.items():
+        # We allow parameters to not have a constraint so that third party estimators
+        # can inherit from sklearn estimators without having to necessarily use the
+        # validation tools.
+        if param_name not in parameter_constraints:
+            continue
+
+        constraints = parameter_constraints[param_name]
+
+        if constraints == "no_validation":
+            continue
+
+        constraints = [make_constraint(constraint) for constraint in constraints]
+
+        for constraint in constraints:
+            if constraint.is_satisfied_by(param_val):
+                # this constraint is satisfied, no need to check further.
+                break
+        else:
+            # No constraint is satisfied, raise with an informative message.
+
+            # Ignore constraints that we don't want to expose in the error message,
+            # i.e. options that are for internal purpose or not officially supported.
+            constraints = [
+                constraint for constraint in constraints if not constraint.hidden
+            ]
+
+            if len(constraints) == 1:
+                constraints_str = f"{constraints[0]}"
+            else:
+                constraints_str = (
+                    f"{', '.join([str(c) for c in constraints[:-1]])} or"
+                    f" {constraints[-1]}"
+                )
+
+            raise InvalidParameterError(
+                f"The {param_name!r} parameter of {caller_name} must be"
+                f" {constraints_str}. Got {param_val!r} instead."
+            )
+
+
+def make_constraint(constraint):
+    """Convert the constraint into the appropriate Constraint object.
+
+    Parameters
+    ----------
+    constraint : object
+        The constraint to convert.
+
+    Returns
+    -------
+    constraint : instance of _Constraint
+        The converted constraint.
+    """
+    if isinstance(constraint, str) and constraint == "array-like":
+        return _ArrayLikes()
+    if isinstance(constraint, str) and constraint == "sparse matrix":
+        return _SparseMatrices()
+    if isinstance(constraint, str) and constraint == "random_state":
+        return _RandomStates()
+    if constraint is callable:
+        return _Callables()
+    if constraint is None:
+        return _NoneConstraint()
+    if isinstance(constraint, type):
+        return _InstancesOf(constraint)
+    if isinstance(
+        constraint, (Interval, StrOptions, Options, HasMethods, MissingValues)
+    ):
+        return constraint
+    if isinstance(constraint, str) and constraint == "boolean":
+        return _Booleans()
+    if isinstance(constraint, str) and constraint == "verbose":
+        return _VerboseHelper()
+    if isinstance(constraint, str) and constraint == "cv_object":
+        return _CVObjects()
+    if isinstance(constraint, Hidden):
+        constraint = make_constraint(constraint.constraint)
+        constraint.hidden = True
+        return constraint
+    if isinstance(constraint, str) and constraint == "nan":
+        return _NanConstraint()
+    raise ValueError(f"Unknown constraint type: {constraint}")
+
+
+def validate_params(parameter_constraints, *, prefer_skip_nested_validation):
+    """Decorator to validate types and values of functions and methods.
+
+    Parameters
+    ----------
+    parameter_constraints : dict
+        A dictionary `param_name: list of constraints`. See the docstring of
+        `validate_parameter_constraints` for a description of the accepted constraints.
+
+        Note that the *args and **kwargs parameters are not validated and must not be
+        present in the parameter_constraints dictionary.
+
+    prefer_skip_nested_validation : bool
+        If True, the validation of parameters of inner estimators or functions
+        called by the decorated function will be skipped.
+
+        This is useful to avoid validating many times the parameters passed by the
+        user from the public facing API. It's also useful to avoid validating
+        parameters that we pass internally to inner functions that are guaranteed to
+        be valid by the test suite.
+
+        It should be set to True for most functions, except for those that receive
+        non-validated objects as parameters or that are just wrappers around classes
+        because they only perform a partial validation.
+
+    Returns
+    -------
+    decorated_function : function or method
+        The decorated function.
+    """
+
+    def decorator(func):
+        # The dict of parameter constraints is set as an attribute of the function
+        # to make it possible to dynamically introspect the constraints for
+        # automatic testing.
+        setattr(func, "_skl_parameter_constraints", parameter_constraints)
+
+        @functools.wraps(func)
+        def wrapper(*args, **kwargs):
+            global_skip_validation = get_config()["skip_parameter_validation"]
+            if global_skip_validation:
+                return func(*args, **kwargs)
+
+            func_sig = signature(func)
+
+            # Map *args/**kwargs to the function signature
+            params = func_sig.bind(*args, **kwargs)
+            params.apply_defaults()
+
+            # ignore self/cls and positional/keyword markers
+            to_ignore = [
+                p.name
+                for p in func_sig.parameters.values()
+                if p.kind in (p.VAR_POSITIONAL, p.VAR_KEYWORD)
+            ]
+            to_ignore += ["self", "cls"]
+            params = {k: v for k, v in params.arguments.items() if k not in to_ignore}
+
+            validate_parameter_constraints(
+                parameter_constraints, params, caller_name=func.__qualname__
+            )
+
+            try:
+                with config_context(
+                    skip_parameter_validation=(
+                        prefer_skip_nested_validation or global_skip_validation
+                    )
+                ):
+                    return func(*args, **kwargs)
+            except InvalidParameterError as e:
+                # When the function is just a wrapper around an estimator, we allow
+                # the function to delegate validation to the estimator, but we replace
+                # the name of the estimator by the name of the function in the error
+                # message to avoid confusion.
+                msg = re.sub(
+                    r"parameter of \w+ must be",
+                    f"parameter of {func.__qualname__} must be",
+                    str(e),
+                )
+                raise InvalidParameterError(msg) from e
+
+        return wrapper
+
+    return decorator
+
+
+class RealNotInt(Real):
+    """A type that represents reals that are not instances of int.
+
+    Behaves like float, but also works with values extracted from numpy arrays.
+    isintance(1, RealNotInt) -> False
+    isinstance(1.0, RealNotInt) -> True
+    """
+
+
+RealNotInt.register(float)
+
+
+def _type_name(t):
+    """Convert type into human readable string."""
+    module = t.__module__
+    qualname = t.__qualname__
+    if module == "builtins":
+        return qualname
+    elif t == Real:
+        return "float"
+    elif t == Integral:
+        return "int"
+    return f"{module}.{qualname}"
+
+
+class _Constraint(ABC):
+    """Base class for the constraint objects."""
+
+    def __init__(self):
+        self.hidden = False
+
+    @abstractmethod
+    def is_satisfied_by(self, val):
+        """Whether or not a value satisfies the constraint.
+
+        Parameters
+        ----------
+        val : object
+            The value to check.
+
+        Returns
+        -------
+        is_satisfied : bool
+            Whether or not the constraint is satisfied by this value.
+        """
+
+    @abstractmethod
+    def __str__(self):
+        """A human readable representational string of the constraint."""
+
+
+class _InstancesOf(_Constraint):
+    """Constraint representing instances of a given type.
+
+    Parameters
+    ----------
+    type : type
+        The valid type.
+    """
+
+    def __init__(self, type):
+        super().__init__()
+        self.type = type
+
+    def is_satisfied_by(self, val):
+        return isinstance(val, self.type)
+
+    def __str__(self):
+        return f"an instance of {_type_name(self.type)!r}"
+
+
+class _NoneConstraint(_Constraint):
+    """Constraint representing the None singleton."""
+
+    def is_satisfied_by(self, val):
+        return val is None
+
+    def __str__(self):
+        return "None"
+
+
+class _NanConstraint(_Constraint):
+    """Constraint representing the indicator `np.nan`."""
+
+    def is_satisfied_by(self, val):
+        return (
+            not isinstance(val, Integral) and isinstance(val, Real) and math.isnan(val)
+        )
+
+    def __str__(self):
+        return "numpy.nan"
+
+
+class _PandasNAConstraint(_Constraint):
+    """Constraint representing the indicator `pd.NA`."""
+
+    def is_satisfied_by(self, val):
+        try:
+            import pandas as pd
+
+            return isinstance(val, type(pd.NA)) and pd.isna(val)
+        except ImportError:
+            return False
+
+    def __str__(self):
+        return "pandas.NA"
+
+
+class Options(_Constraint):
+    """Constraint representing a finite set of instances of a given type.
+
+    Parameters
+    ----------
+    type : type
+
+    options : set
+        The set of valid scalars.
+
+    deprecated : set or None, default=None
+        A subset of the `options` to mark as deprecated in the string
+        representation of the constraint.
+    """
+
+    def __init__(self, type, options, *, deprecated=None):
+        super().__init__()
+        self.type = type
+        self.options = options
+        self.deprecated = deprecated or set()
+
+        if self.deprecated - self.options:
+            raise ValueError("The deprecated options must be a subset of the options.")
+
+    def is_satisfied_by(self, val):
+        return isinstance(val, self.type) and val in self.options
+
+    def _mark_if_deprecated(self, option):
+        """Add a deprecated mark to an option if needed."""
+        option_str = f"{option!r}"
+        if option in self.deprecated:
+            option_str = f"{option_str} (deprecated)"
+        return option_str
+
+    def __str__(self):
+        options_str = (
+            f"{', '.join([self._mark_if_deprecated(o) for o in self.options])}"
+        )
+        return f"a {_type_name(self.type)} among {{{options_str}}}"
+
+
+class StrOptions(Options):
+    """Constraint representing a finite set of strings.
+
+    Parameters
+    ----------
+    options : set of str
+        The set of valid strings.
+
+    deprecated : set of str or None, default=None
+        A subset of the `options` to mark as deprecated in the string
+        representation of the constraint.
+    """
+
+    def __init__(self, options, *, deprecated=None):
+        super().__init__(type=str, options=options, deprecated=deprecated)
+
+
+class Interval(_Constraint):
+    """Constraint representing a typed interval.
+
+    Parameters
+    ----------
+    type : {numbers.Integral, numbers.Real, RealNotInt}
+        The set of numbers in which to set the interval.
+
+        If RealNotInt, only reals that don't have the integer type
+        are allowed. For example 1.0 is allowed but 1 is not.
+
+    left : float or int or None
+        The left bound of the interval. None means left bound is -∞.
+
+    right : float, int or None
+        The right bound of the interval. None means right bound is +∞.
+
+    closed : {"left", "right", "both", "neither"}
+        Whether the interval is open or closed. Possible choices are:
+
+        - `"left"`: the interval is closed on the left and open on the right.
+          It is equivalent to the interval `[ left, right )`.
+        - `"right"`: the interval is closed on the right and open on the left.
+          It is equivalent to the interval `( left, right ]`.
+        - `"both"`: the interval is closed.
+          It is equivalent to the interval `[ left, right ]`.
+        - `"neither"`: the interval is open.
+          It is equivalent to the interval `( left, right )`.
+
+    Notes
+    -----
+    Setting a bound to `None` and setting the interval closed is valid. For instance,
+    strictly speaking, `Interval(Real, 0, None, closed="both")` corresponds to
+    `[0, +∞) U {+∞}`.
+    """
+
+    def __init__(self, type, left, right, *, closed):
+        super().__init__()
+        self.type = type
+        self.left = left
+        self.right = right
+        self.closed = closed
+
+        self._check_params()
+
+    def _check_params(self):
+        if self.type not in (Integral, Real, RealNotInt):
+            raise ValueError(
+                "type must be either numbers.Integral, numbers.Real or RealNotInt."
+                f" Got {self.type} instead."
+            )
+
+        if self.closed not in ("left", "right", "both", "neither"):
+            raise ValueError(
+                "closed must be either 'left', 'right', 'both' or 'neither'. "
+                f"Got {self.closed} instead."
+            )
+
+        if self.type is Integral:
+            suffix = "for an interval over the integers."
+            if self.left is not None and not isinstance(self.left, Integral):
+                raise TypeError(f"Expecting left to be an int {suffix}")
+            if self.right is not None and not isinstance(self.right, Integral):
+                raise TypeError(f"Expecting right to be an int {suffix}")
+            if self.left is None and self.closed in ("left", "both"):
+                raise ValueError(
+                    f"left can't be None when closed == {self.closed} {suffix}"
+                )
+            if self.right is None and self.closed in ("right", "both"):
+                raise ValueError(
+                    f"right can't be None when closed == {self.closed} {suffix}"
+                )
+        else:
+            if self.left is not None and not isinstance(self.left, Real):
+                raise TypeError("Expecting left to be a real number.")
+            if self.right is not None and not isinstance(self.right, Real):
+                raise TypeError("Expecting right to be a real number.")
+
+        if self.right is not None and self.left is not None and self.right <= self.left:
+            raise ValueError(
+                f"right can't be less than left. Got left={self.left} and "
+                f"right={self.right}"
+            )
+
+    def __contains__(self, val):
+        if not isinstance(val, Integral) and np.isnan(val):
+            return False
+
+        left_cmp = operator.lt if self.closed in ("left", "both") else operator.le
+        right_cmp = operator.gt if self.closed in ("right", "both") else operator.ge
+
+        left = -np.inf if self.left is None else self.left
+        right = np.inf if self.right is None else self.right
+
+        if left_cmp(val, left):
+            return False
+        if right_cmp(val, right):
+            return False
+        return True
+
+    def is_satisfied_by(self, val):
+        if not isinstance(val, self.type):
+            return False
+
+        return val in self
+
+    def __str__(self):
+        type_str = "an int" if self.type is Integral else "a float"
+        left_bracket = "[" if self.closed in ("left", "both") else "("
+        left_bound = "-inf" if self.left is None else self.left
+        right_bound = "inf" if self.right is None else self.right
+        right_bracket = "]" if self.closed in ("right", "both") else ")"
+
+        # better repr if the bounds were given as integers
+        if not self.type == Integral and isinstance(self.left, Real):
+            left_bound = float(left_bound)
+        if not self.type == Integral and isinstance(self.right, Real):
+            right_bound = float(right_bound)
+
+        return (
+            f"{type_str} in the range "
+            f"{left_bracket}{left_bound}, {right_bound}{right_bracket}"
+        )
+
+
+class _ArrayLikes(_Constraint):
+    """Constraint representing array-likes"""
+
+    def is_satisfied_by(self, val):
+        return _is_arraylike_not_scalar(val)
+
+    def __str__(self):
+        return "an array-like"
+
+
+class _SparseMatrices(_Constraint):
+    """Constraint representing sparse matrices."""
+
+    def is_satisfied_by(self, val):
+        return issparse(val)
+
+    def __str__(self):
+        return "a sparse matrix"
+
+
+class _Callables(_Constraint):
+    """Constraint representing callables."""
+
+    def is_satisfied_by(self, val):
+        return callable(val)
+
+    def __str__(self):
+        return "a callable"
+
+
+class _RandomStates(_Constraint):
+    """Constraint representing random states.
+
+    Convenience class for
+    [Interval(Integral, 0, 2**32 - 1, closed="both"), np.random.RandomState, None]
+    """
+
+    def __init__(self):
+        super().__init__()
+        self._constraints = [
+            Interval(Integral, 0, 2**32 - 1, closed="both"),
+            _InstancesOf(np.random.RandomState),
+            _NoneConstraint(),
+        ]
+
+    def is_satisfied_by(self, val):
+        return any(c.is_satisfied_by(val) for c in self._constraints)
+
+    def __str__(self):
+        return (
+            f"{', '.join([str(c) for c in self._constraints[:-1]])} or"
+            f" {self._constraints[-1]}"
+        )
+
+
+class _Booleans(_Constraint):
+    """Constraint representing boolean likes.
+
+    Convenience class for
+    [bool, np.bool_]
+    """
+
+    def __init__(self):
+        super().__init__()
+        self._constraints = [
+            _InstancesOf(bool),
+            _InstancesOf(np.bool_),
+        ]
+
+    def is_satisfied_by(self, val):
+        return any(c.is_satisfied_by(val) for c in self._constraints)
+
+    def __str__(self):
+        return (
+            f"{', '.join([str(c) for c in self._constraints[:-1]])} or"
+            f" {self._constraints[-1]}"
+        )
+
+
+class _VerboseHelper(_Constraint):
+    """Helper constraint for the verbose parameter.
+
+    Convenience class for
+    [Interval(Integral, 0, None, closed="left"), bool, numpy.bool_]
+    """
+
+    def __init__(self):
+        super().__init__()
+        self._constraints = [
+            Interval(Integral, 0, None, closed="left"),
+            _InstancesOf(bool),
+            _InstancesOf(np.bool_),
+        ]
+
+    def is_satisfied_by(self, val):
+        return any(c.is_satisfied_by(val) for c in self._constraints)
+
+    def __str__(self):
+        return (
+            f"{', '.join([str(c) for c in self._constraints[:-1]])} or"
+            f" {self._constraints[-1]}"
+        )
+
+
+class MissingValues(_Constraint):
+    """Helper constraint for the `missing_values` parameters.
+
+    Convenience for
+    [
+        Integral,
+        Interval(Real, None, None, closed="both"),
+        str,   # when numeric_only is False
+        None,  # when numeric_only is False
+        _NanConstraint(),
+        _PandasNAConstraint(),
+    ]
+
+    Parameters
+    ----------
+    numeric_only : bool, default=False
+        Whether to consider only numeric missing value markers.
+
+    """
+
+    def __init__(self, numeric_only=False):
+        super().__init__()
+
+        self.numeric_only = numeric_only
+
+        self._constraints = [
+            _InstancesOf(Integral),
+            # we use an interval of Real to ignore np.nan that has its own constraint
+            Interval(Real, None, None, closed="both"),
+            _NanConstraint(),
+            _PandasNAConstraint(),
+        ]
+        if not self.numeric_only:
+            self._constraints.extend([_InstancesOf(str), _NoneConstraint()])
+
+    def is_satisfied_by(self, val):
+        return any(c.is_satisfied_by(val) for c in self._constraints)
+
+    def __str__(self):
+        return (
+            f"{', '.join([str(c) for c in self._constraints[:-1]])} or"
+            f" {self._constraints[-1]}"
+        )
+
+
+class HasMethods(_Constraint):
+    """Constraint representing objects that expose specific methods.
+
+    It is useful for parameters following a protocol and where we don't want to impose
+    an affiliation to a specific module or class.
+
+    Parameters
+    ----------
+    methods : str or list of str
+        The method(s) that the object is expected to expose.
+    """
+
+    @validate_params(
+        {"methods": [str, list]},
+        prefer_skip_nested_validation=True,
+    )
+    def __init__(self, methods):
+        super().__init__()
+        if isinstance(methods, str):
+            methods = [methods]
+        self.methods = methods
+
+    def is_satisfied_by(self, val):
+        return all(callable(getattr(val, method, None)) for method in self.methods)
+
+    def __str__(self):
+        if len(self.methods) == 1:
+            methods = f"{self.methods[0]!r}"
+        else:
+            methods = (
+                f"{', '.join([repr(m) for m in self.methods[:-1]])} and"
+                f" {self.methods[-1]!r}"
+            )
+        return f"an object implementing {methods}"
+
+
+class _IterablesNotString(_Constraint):
+    """Constraint representing iterables that are not strings."""
+
+    def is_satisfied_by(self, val):
+        return isinstance(val, Iterable) and not isinstance(val, str)
+
+    def __str__(self):
+        return "an iterable"
+
+
+class _CVObjects(_Constraint):
+    """Constraint representing cv objects.
+
+    Convenient class for
+    [
+        Interval(Integral, 2, None, closed="left"),
+        HasMethods(["split", "get_n_splits"]),
+        _IterablesNotString(),
+        None,
+    ]
+    """
+
+    def __init__(self):
+        super().__init__()
+        self._constraints = [
+            Interval(Integral, 2, None, closed="left"),
+            HasMethods(["split", "get_n_splits"]),
+            _IterablesNotString(),
+            _NoneConstraint(),
+        ]
+
+    def is_satisfied_by(self, val):
+        return any(c.is_satisfied_by(val) for c in self._constraints)
+
+    def __str__(self):
+        return (
+            f"{', '.join([str(c) for c in self._constraints[:-1]])} or"
+            f" {self._constraints[-1]}"
+        )
+
+
+class Hidden:
+    """Class encapsulating a constraint not meant to be exposed to the user.
+
+    Parameters
+    ----------
+    constraint : str or _Constraint instance
+        The constraint to be used internally.
+    """
+
+    def __init__(self, constraint):
+        self.constraint = constraint
+
+
+def generate_invalid_param_val(constraint):
+    """Return a value that does not satisfy the constraint.
+
+    Raises a NotImplementedError if there exists no invalid value for this constraint.
+
+    This is only useful for testing purpose.
+
+    Parameters
+    ----------
+    constraint : _Constraint instance
+        The constraint to generate a value for.
+
+    Returns
+    -------
+    val : object
+        A value that does not satisfy the constraint.
+    """
+    if isinstance(constraint, StrOptions):
+        return f"not {' or '.join(constraint.options)}"
+
+    if isinstance(constraint, MissingValues):
+        return np.array([1, 2, 3])
+
+    if isinstance(constraint, _VerboseHelper):
+        return -1
+
+    if isinstance(constraint, HasMethods):
+        return type("HasNotMethods", (), {})()
+
+    if isinstance(constraint, _IterablesNotString):
+        return "a string"
+
+    if isinstance(constraint, _CVObjects):
+        return "not a cv object"
+
+    if isinstance(constraint, Interval) and constraint.type is Integral:
+        if constraint.left is not None:
+            return constraint.left - 1
+        if constraint.right is not None:
+            return constraint.right + 1
+
+        # There's no integer outside (-inf, +inf)
+        raise NotImplementedError
+
+    if isinstance(constraint, Interval) and constraint.type in (Real, RealNotInt):
+        if constraint.left is not None:
+            return constraint.left - 1e-6
+        if constraint.right is not None:
+            return constraint.right + 1e-6
+
+        # bounds are -inf, +inf
+        if constraint.closed in ("right", "neither"):
+            return -np.inf
+        if constraint.closed in ("left", "neither"):
+            return np.inf
+
+        # interval is [-inf, +inf]
+        return np.nan
+
+    raise NotImplementedError
+
+
+def generate_valid_param(constraint):
+    """Return a value that does satisfy a constraint.
+
+    This is only useful for testing purpose.
+
+    Parameters
+    ----------
+    constraint : Constraint instance
+        The constraint to generate a value for.
+
+    Returns
+    -------
+    val : object
+        A value that does satisfy the constraint.
+    """
+    if isinstance(constraint, _ArrayLikes):
+        return np.array([1, 2, 3])
+
+    if isinstance(constraint, _SparseMatrices):
+        return csr_matrix([[0, 1], [1, 0]])
+
+    if isinstance(constraint, _RandomStates):
+        return np.random.RandomState(42)
+
+    if isinstance(constraint, _Callables):
+        return lambda x: x
+
+    if isinstance(constraint, _NoneConstraint):
+        return None
+
+    if isinstance(constraint, _InstancesOf):
+        if constraint.type is np.ndarray:
+            # special case for ndarray since it can't be instantiated without arguments
+            return np.array([1, 2, 3])
+
+        if constraint.type in (Integral, Real):
+            # special case for Integral and Real since they are abstract classes
+            return 1
+
+        return constraint.type()
+
+    if isinstance(constraint, _Booleans):
+        return True
+
+    if isinstance(constraint, _VerboseHelper):
+        return 1
+
+    if isinstance(constraint, MissingValues) and constraint.numeric_only:
+        return np.nan
+
+    if isinstance(constraint, MissingValues) and not constraint.numeric_only:
+        return "missing"
+
+    if isinstance(constraint, HasMethods):
+        return type(
+            "ValidHasMethods", (), {m: lambda self: None for m in constraint.methods}
+        )()
+
+    if isinstance(constraint, _IterablesNotString):
+        return [1, 2, 3]
+
+    if isinstance(constraint, _CVObjects):
+        return 5
+
+    if isinstance(constraint, Options):  # includes StrOptions
+        for option in constraint.options:
+            return option
+
+    if isinstance(constraint, Interval):
+        interval = constraint
+        if interval.left is None and interval.right is None:
+            return 0
+        elif interval.left is None:
+            return interval.right - 1
+        elif interval.right is None:
+            return interval.left + 1
+        else:
+            if interval.type is Real:
+                return (interval.left + interval.right) / 2
+            else:
+                return interval.left + 1
+
+    raise ValueError(f"Unknown constraint type: {constraint}")
@@ -0,0 +1,99 @@
+import numpy as np
+
+from . import check_consistent_length
+from ._optional_dependencies import check_matplotlib_support
+from ._response import _get_response_values_binary
+from .multiclass import type_of_target
+from .validation import _check_pos_label_consistency
+
+
+class _BinaryClassifierCurveDisplayMixin:
+    """Mixin class to be used in Displays requiring a binary classifier.
+
+    The aim of this class is to centralize some validations regarding the estimator and
+    the target and gather the response of the estimator.
+    """
+
+    def _validate_plot_params(self, *, ax=None, name=None):
+        check_matplotlib_support(f"{self.__class__.__name__}.plot")
+        import matplotlib.pyplot as plt
+
+        if ax is None:
+            _, ax = plt.subplots()
+
+        name = self.estimator_name if name is None else name
+        return ax, ax.figure, name
+
+    @classmethod
+    def _validate_and_get_response_values(
+        cls, estimator, X, y, *, response_method="auto", pos_label=None, name=None
+    ):
+        check_matplotlib_support(f"{cls.__name__}.from_estimator")
+
+        name = estimator.__class__.__name__ if name is None else name
+
+        y_pred, pos_label = _get_response_values_binary(
+            estimator,
+            X,
+            response_method=response_method,
+            pos_label=pos_label,
+        )
+
+        return y_pred, pos_label, name
+
+    @classmethod
+    def _validate_from_predictions_params(
+        cls, y_true, y_pred, *, sample_weight=None, pos_label=None, name=None
+    ):
+        check_matplotlib_support(f"{cls.__name__}.from_predictions")
+
+        if type_of_target(y_true) != "binary":
+            raise ValueError(
+                f"The target y is not binary. Got {type_of_target(y_true)} type of"
+                " target."
+            )
+
+        check_consistent_length(y_true, y_pred, sample_weight)
+        pos_label = _check_pos_label_consistency(pos_label, y_true)
+
+        name = name if name is not None else "Classifier"
+
+        return pos_label, name
+
+
+def _validate_score_name(score_name, scoring, negate_score):
+    """Validate the `score_name` parameter.
+
+    If `score_name` is provided, we just return it as-is.
+    If `score_name` is `None`, we use `Score` if `negate_score` is `False` and
+    `Negative score` otherwise.
+    If `score_name` is a string or a callable, we infer the name. We replace `_` by
+    spaces and capitalize the first letter. We remove `neg_` and replace it by
+    `"Negative"` if `negate_score` is `False` or just remove it otherwise.
+    """
+    if score_name is not None:
+        return score_name
+    elif scoring is None:
+        return "Negative score" if negate_score else "Score"
+    else:
+        score_name = scoring.__name__ if callable(scoring) else scoring
+        if negate_score:
+            if score_name.startswith("neg_"):
+                score_name = score_name[4:]
+            else:
+                score_name = f"Negative {score_name}"
+        elif score_name.startswith("neg_"):
+            score_name = f"Negative {score_name[4:]}"
+        score_name = score_name.replace("_", " ")
+        return score_name.capitalize()
+
+
+def _interval_max_min_ratio(data):
+    """Compute the ratio between the largest and smallest inter-point distances.
+
+    A value larger than 5 typically indicates that the parameter range would
+    better be displayed with a log scale while a linear scale would be more
+    suitable otherwise.
+    """
+    diff = np.diff(np.sort(data))
+    return diff.max() / diff.min()
@@ -0,0 +1,463 @@
+"""This module contains the _EstimatorPrettyPrinter class used in
+BaseEstimator.__repr__ for pretty-printing estimators"""
+
+# Copyright (c) 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
+# 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018 Python Software Foundation;
+# All Rights Reserved
+
+# Authors: Fred L. Drake, Jr. <fdrake@acm.org> (built-in CPython pprint module)
+#          Nicolas Hug (scikit-learn specific changes)
+
+# License: PSF License version 2 (see below)
+
+# PYTHON SOFTWARE FOUNDATION LICENSE VERSION 2
+# --------------------------------------------
+
+# 1. This LICENSE AGREEMENT is between the Python Software Foundation ("PSF"),
+# and the Individual or Organization ("Licensee") accessing and otherwise
+# using this software ("Python") in source or binary form and its associated
+# documentation.
+
+# 2. Subject to the terms and conditions of this License Agreement, PSF hereby
+# grants Licensee a nonexclusive, royalty-free, world-wide license to
+# reproduce, analyze, test, perform and/or display publicly, prepare
+# derivative works, distribute, and otherwise use Python alone or in any
+# derivative version, provided, however, that PSF's License Agreement and
+# PSF's notice of copyright, i.e., "Copyright (c) 2001, 2002, 2003, 2004,
+# 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016,
+# 2017, 2018 Python Software Foundation; All Rights Reserved" are retained in
+# Python alone or in any derivative version prepared by Licensee.
+
+# 3. In the event Licensee prepares a derivative work that is based on or
+# incorporates Python or any part thereof, and wants to make the derivative
+# work available to others as provided herein, then Licensee hereby agrees to
+# include in any such work a brief summary of the changes made to Python.
+
+# 4. PSF is making Python available to Licensee on an "AS IS" basis. PSF MAKES
+# NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR IMPLIED. BY WAY OF EXAMPLE, BUT
+# NOT LIMITATION, PSF MAKES NO AND DISCLAIMS ANY REPRESENTATION OR WARRANTY OF
+# MERCHANTABILITY OR FITNESS FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF
+# PYTHON WILL NOT INFRINGE ANY THIRD PARTY RIGHTS.
+
+# 5. PSF SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF PYTHON FOR ANY
+# INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS A RESULT OF
+# MODIFYING, DISTRIBUTING, OR OTHERWISE USING PYTHON, OR ANY DERIVATIVE
+# THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF.
+
+# 6. This License Agreement will automatically terminate upon a material
+# breach of its terms and conditions.
+
+# 7. Nothing in this License Agreement shall be deemed to create any
+# relationship of agency, partnership, or joint venture between PSF and
+# Licensee. This License Agreement does not grant permission to use PSF
+# trademarks or trade name in a trademark sense to endorse or promote products
+# or services of Licensee, or any third party.
+
+# 8. By copying, installing or otherwise using Python, Licensee agrees to be
+# bound by the terms and conditions of this License Agreement.
+
+
+# Brief summary of changes to original code:
+# - "compact" parameter is supported for dicts, not just lists or tuples
+# - estimators have a custom handler, they're not just treated as objects
+# - long sequences (lists, tuples, dict items) with more than N elements are
+#   shortened using ellipsis (', ...') at the end.
+
+import inspect
+import pprint
+from collections import OrderedDict
+
+from .._config import get_config
+from ..base import BaseEstimator
+from ._missing import is_scalar_nan
+
+
+class KeyValTuple(tuple):
+    """Dummy class for correctly rendering key-value tuples from dicts."""
+
+    def __repr__(self):
+        # needed for _dispatch[tuple.__repr__] not to be overridden
+        return super().__repr__()
+
+
+class KeyValTupleParam(KeyValTuple):
+    """Dummy class for correctly rendering key-value tuples from parameters."""
+
+    pass
+
+
+def _changed_params(estimator):
+    """Return dict (param_name: value) of parameters that were given to
+    estimator with non-default values."""
+
+    params = estimator.get_params(deep=False)
+    init_func = getattr(estimator.__init__, "deprecated_original", estimator.__init__)
+    init_params = inspect.signature(init_func).parameters
+    init_params = {name: param.default for name, param in init_params.items()}
+
+    def has_changed(k, v):
+        if k not in init_params:  # happens if k is part of a **kwargs
+            return True
+        if init_params[k] == inspect._empty:  # k has no default value
+            return True
+        # try to avoid calling repr on nested estimators
+        if isinstance(v, BaseEstimator) and v.__class__ != init_params[k].__class__:
+            return True
+        # Use repr as a last resort. It may be expensive.
+        if repr(v) != repr(init_params[k]) and not (
+            is_scalar_nan(init_params[k]) and is_scalar_nan(v)
+        ):
+            return True
+        return False
+
+    return {k: v for k, v in params.items() if has_changed(k, v)}
+
+
+class _EstimatorPrettyPrinter(pprint.PrettyPrinter):
+    """Pretty Printer class for estimator objects.
+
+    This extends the pprint.PrettyPrinter class, because:
+    - we need estimators to be printed with their parameters, e.g.
+      Estimator(param1=value1, ...) which is not supported by default.
+    - the 'compact' parameter of PrettyPrinter is ignored for dicts, which
+      may lead to very long representations that we want to avoid.
+
+    Quick overview of pprint.PrettyPrinter (see also
+    https://stackoverflow.com/questions/49565047/pprint-with-hex-numbers):
+
+    - the entry point is the _format() method which calls format() (overridden
+      here)
+    - format() directly calls _safe_repr() for a first try at rendering the
+      object
+    - _safe_repr formats the whole object recursively, only calling itself,
+      not caring about line length or anything
+    - back to _format(), if the output string is too long, _format() then calls
+      the appropriate _pprint_TYPE() method (e.g. _pprint_list()) depending on
+      the type of the object. This where the line length and the compact
+      parameters are taken into account.
+    - those _pprint_TYPE() methods will internally use the format() method for
+      rendering the nested objects of an object (e.g. the elements of a list)
+
+    In the end, everything has to be implemented twice: in _safe_repr and in
+    the custom _pprint_TYPE methods. Unfortunately PrettyPrinter is really not
+    straightforward to extend (especially when we want a compact output), so
+    the code is a bit convoluted.
+
+    This class overrides:
+    - format() to support the changed_only parameter
+    - _safe_repr to support printing of estimators (for when they fit on a
+      single line)
+    - _format_dict_items so that dict are correctly 'compacted'
+    - _format_items so that ellipsis is used on long lists and tuples
+
+    When estimators cannot be printed on a single line, the builtin _format()
+    will call _pprint_estimator() because it was registered to do so (see
+    _dispatch[BaseEstimator.__repr__] = _pprint_estimator).
+
+    both _format_dict_items() and _pprint_estimator() use the
+    _format_params_or_dict_items() method that will format parameters and
+    key-value pairs respecting the compact parameter. This method needs another
+    subroutine _pprint_key_val_tuple() used when a parameter or a key-value
+    pair is too long to fit on a single line. This subroutine is called in
+    _format() and is registered as well in the _dispatch dict (just like
+    _pprint_estimator). We had to create the two classes KeyValTuple and
+    KeyValTupleParam for this.
+    """
+
+    def __init__(
+        self,
+        indent=1,
+        width=80,
+        depth=None,
+        stream=None,
+        *,
+        compact=False,
+        indent_at_name=True,
+        n_max_elements_to_show=None,
+    ):
+        super().__init__(indent, width, depth, stream, compact=compact)
+        self._indent_at_name = indent_at_name
+        if self._indent_at_name:
+            self._indent_per_level = 1  # ignore indent param
+        self._changed_only = get_config()["print_changed_only"]
+        # Max number of elements in a list, dict, tuple until we start using
+        # ellipsis. This also affects the number of arguments of an estimators
+        # (they are treated as dicts)
+        self.n_max_elements_to_show = n_max_elements_to_show
+
+    def format(self, object, context, maxlevels, level):
+        return _safe_repr(
+            object, context, maxlevels, level, changed_only=self._changed_only
+        )
+
+    def _pprint_estimator(self, object, stream, indent, allowance, context, level):
+        stream.write(object.__class__.__name__ + "(")
+        if self._indent_at_name:
+            indent += len(object.__class__.__name__)
+
+        if self._changed_only:
+            params = _changed_params(object)
+        else:
+            params = object.get_params(deep=False)
+
+        params = OrderedDict((name, val) for (name, val) in sorted(params.items()))
+
+        self._format_params(
+            params.items(), stream, indent, allowance + 1, context, level
+        )
+        stream.write(")")
+
+    def _format_dict_items(self, items, stream, indent, allowance, context, level):
+        return self._format_params_or_dict_items(
+            items, stream, indent, allowance, context, level, is_dict=True
+        )
+
+    def _format_params(self, items, stream, indent, allowance, context, level):
+        return self._format_params_or_dict_items(
+            items, stream, indent, allowance, context, level, is_dict=False
+        )
+
+    def _format_params_or_dict_items(
+        self, object, stream, indent, allowance, context, level, is_dict
+    ):
+        """Format dict items or parameters respecting the compact=True
+        parameter. For some reason, the builtin rendering of dict items doesn't
+        respect compact=True and will use one line per key-value if all cannot
+        fit in a single line.
+        Dict items will be rendered as <'key': value> while params will be
+        rendered as <key=value>. The implementation is mostly copy/pasting from
+        the builtin _format_items().
+        This also adds ellipsis if the number of items is greater than
+        self.n_max_elements_to_show.
+        """
+        write = stream.write
+        indent += self._indent_per_level
+        delimnl = ",\n" + " " * indent
+        delim = ""
+        width = max_width = self._width - indent + 1
+        it = iter(object)
+        try:
+            next_ent = next(it)
+        except StopIteration:
+            return
+        last = False
+        n_items = 0
+        while not last:
+            if n_items == self.n_max_elements_to_show:
+                write(", ...")
+                break
+            n_items += 1
+            ent = next_ent
+            try:
+                next_ent = next(it)
+            except StopIteration:
+                last = True
+                max_width -= allowance
+                width -= allowance
+            if self._compact:
+                k, v = ent
+                krepr = self._repr(k, context, level)
+                vrepr = self._repr(v, context, level)
+                if not is_dict:
+                    krepr = krepr.strip("'")
+                middle = ": " if is_dict else "="
+                rep = krepr + middle + vrepr
+                w = len(rep) + 2
+                if width < w:
+                    width = max_width
+                    if delim:
+                        delim = delimnl
+                if width >= w:
+                    width -= w
+                    write(delim)
+                    delim = ", "
+                    write(rep)
+                    continue
+            write(delim)
+            delim = delimnl
+            class_ = KeyValTuple if is_dict else KeyValTupleParam
+            self._format(
+                class_(ent), stream, indent, allowance if last else 1, context, level
+            )
+
+    def _format_items(self, items, stream, indent, allowance, context, level):
+        """Format the items of an iterable (list, tuple...). Same as the
+        built-in _format_items, with support for ellipsis if the number of
+        elements is greater than self.n_max_elements_to_show.
+        """
+        write = stream.write
+        indent += self._indent_per_level
+        if self._indent_per_level > 1:
+            write((self._indent_per_level - 1) * " ")
+        delimnl = ",\n" + " " * indent
+        delim = ""
+        width = max_width = self._width - indent + 1
+        it = iter(items)
+        try:
+            next_ent = next(it)
+        except StopIteration:
+            return
+        last = False
+        n_items = 0
+        while not last:
+            if n_items == self.n_max_elements_to_show:
+                write(", ...")
+                break
+            n_items += 1
+            ent = next_ent
+            try:
+                next_ent = next(it)
+            except StopIteration:
+                last = True
+                max_width -= allowance
+                width -= allowance
+            if self._compact:
+                rep = self._repr(ent, context, level)
+                w = len(rep) + 2
+                if width < w:
+                    width = max_width
+                    if delim:
+                        delim = delimnl
+                if width >= w:
+                    width -= w
+                    write(delim)
+                    delim = ", "
+                    write(rep)
+                    continue
+            write(delim)
+            delim = delimnl
+            self._format(ent, stream, indent, allowance if last else 1, context, level)
+
+    def _pprint_key_val_tuple(self, object, stream, indent, allowance, context, level):
+        """Pretty printing for key-value tuples from dict or parameters."""
+        k, v = object
+        rep = self._repr(k, context, level)
+        if isinstance(object, KeyValTupleParam):
+            rep = rep.strip("'")
+            middle = "="
+        else:
+            middle = ": "
+        stream.write(rep)
+        stream.write(middle)
+        self._format(
+            v, stream, indent + len(rep) + len(middle), allowance, context, level
+        )
+
+    # Note: need to copy _dispatch to prevent instances of the builtin
+    # PrettyPrinter class to call methods of _EstimatorPrettyPrinter (see issue
+    # 12906)
+    # mypy error: "Type[PrettyPrinter]" has no attribute "_dispatch"
+    _dispatch = pprint.PrettyPrinter._dispatch.copy()  # type: ignore
+    _dispatch[BaseEstimator.__repr__] = _pprint_estimator
+    _dispatch[KeyValTuple.__repr__] = _pprint_key_val_tuple
+
+
+def _safe_repr(object, context, maxlevels, level, changed_only=False):
+    """Same as the builtin _safe_repr, with added support for Estimator
+    objects."""
+    typ = type(object)
+
+    if typ in pprint._builtin_scalars:
+        return repr(object), True, False
+
+    r = getattr(typ, "__repr__", None)
+    if issubclass(typ, dict) and r is dict.__repr__:
+        if not object:
+            return "{}", True, False
+        objid = id(object)
+        if maxlevels and level >= maxlevels:
+            return "{...}", False, objid in context
+        if objid in context:
+            return pprint._recursion(object), False, True
+        context[objid] = 1
+        readable = True
+        recursive = False
+        components = []
+        append = components.append
+        level += 1
+        saferepr = _safe_repr
+        items = sorted(object.items(), key=pprint._safe_tuple)
+        for k, v in items:
+            krepr, kreadable, krecur = saferepr(
+                k, context, maxlevels, level, changed_only=changed_only
+            )
+            vrepr, vreadable, vrecur = saferepr(
+                v, context, maxlevels, level, changed_only=changed_only
+            )
+            append("%s: %s" % (krepr, vrepr))
+            readable = readable and kreadable and vreadable
+            if krecur or vrecur:
+                recursive = True
+        del context[objid]
+        return "{%s}" % ", ".join(components), readable, recursive
+
+    if (issubclass(typ, list) and r is list.__repr__) or (
+        issubclass(typ, tuple) and r is tuple.__repr__
+    ):
+        if issubclass(typ, list):
+            if not object:
+                return "[]", True, False
+            format = "[%s]"
+        elif len(object) == 1:
+            format = "(%s,)"
+        else:
+            if not object:
+                return "()", True, False
+            format = "(%s)"
+        objid = id(object)
+        if maxlevels and level >= maxlevels:
+            return format % "...", False, objid in context
+        if objid in context:
+            return pprint._recursion(object), False, True
+        context[objid] = 1
+        readable = True
+        recursive = False
+        components = []
+        append = components.append
+        level += 1
+        for o in object:
+            orepr, oreadable, orecur = _safe_repr(
+                o, context, maxlevels, level, changed_only=changed_only
+            )
+            append(orepr)
+            if not oreadable:
+                readable = False
+            if orecur:
+                recursive = True
+        del context[objid]
+        return format % ", ".join(components), readable, recursive
+
+    if issubclass(typ, BaseEstimator):
+        objid = id(object)
+        if maxlevels and level >= maxlevels:
+            return "{...}", False, objid in context
+        if objid in context:
+            return pprint._recursion(object), False, True
+        context[objid] = 1
+        readable = True
+        recursive = False
+        if changed_only:
+            params = _changed_params(object)
+        else:
+            params = object.get_params(deep=False)
+        components = []
+        append = components.append
+        level += 1
+        saferepr = _safe_repr
+        items = sorted(params.items(), key=pprint._safe_tuple)
+        for k, v in items:
+            krepr, kreadable, krecur = saferepr(
+                k, context, maxlevels, level, changed_only=changed_only
+            )
+            vrepr, vreadable, vrecur = saferepr(
+                v, context, maxlevels, level, changed_only=changed_only
+            )
+            append("%s=%s" % (krepr.strip("'"), vrepr))
+            readable = readable and kreadable and vreadable
+            if krecur or vrecur:
+                recursive = True
+        del context[objid]
+        return ("%s(%s)" % (typ.__name__, ", ".join(components)), readable, recursive)
+
+    rep = repr(object)
+    return rep, (rep and not rep.startswith("<")), False
@@ -0,0 +1,35 @@
+# Authors: Arnaud Joly
+#
+# License: BSD 3 clause
+
+from ._typedefs cimport uint32_t
+
+
+cdef inline uint32_t DEFAULT_SEED = 1
+
+cdef enum:
+    # Max value for our rand_r replacement (near the bottom).
+    # We don't use RAND_MAX because it's different across platforms and
+    # particularly tiny on Windows/MSVC.
+    # It corresponds to the maximum representable value for
+    # 32-bit signed integers (i.e. 2^31 - 1).
+    RAND_R_MAX = 2147483647
+
+
+# rand_r replacement using a 32bit XorShift generator
+# See http://www.jstatsoft.org/v08/i14/paper for details
+cdef inline uint32_t our_rand_r(uint32_t* seed) nogil:
+    """Generate a pseudo-random np.uint32 from a np.uint32 seed"""
+    # seed shouldn't ever be 0.
+    if (seed[0] == 0):
+        seed[0] = DEFAULT_SEED
+
+    seed[0] ^= <uint32_t>(seed[0] << 13)
+    seed[0] ^= <uint32_t>(seed[0] >> 17)
+    seed[0] ^= <uint32_t>(seed[0] << 5)
+
+    # Use the modulo to make sure that we don't return a values greater than the
+    # maximum representable value for signed 32bit integers (i.e. 2^31 - 1).
+    # Note that the parenthesis are needed to avoid overflow: here
+    # RAND_R_MAX is cast to uint32_t before 1 is added.
+    return seed[0] % ((<uint32_t>RAND_R_MAX) + 1)
@@ -0,0 +1,355 @@
+# Author: Arnaud Joly
+#
+# License: BSD 3 clause
+"""
+Random utility function
+=======================
+This module complements missing features of ``numpy.random``.
+
+The module contains:
+    * Several algorithms to sample integers without replacement.
+    * Fast rand_r alternative based on xor shifts
+"""
+import numpy as np
+from . import check_random_state
+
+from ._typedefs cimport intp_t
+
+
+cdef uint32_t DEFAULT_SEED = 1
+
+
+# Compatibility type to always accept the default int type used by NumPy, both
+# before and after NumPy 2. On Windows, `long` does not always match `inp_t`.
+# See the comments in the `sample_without_replacement` Python function for more
+# details.
+ctypedef fused default_int:
+    intp_t
+    long
+
+
+cpdef _sample_without_replacement_check_input(default_int n_population,
+                                              default_int n_samples):
+    """ Check that input are consistent for sample_without_replacement"""
+    if n_population < 0:
+        raise ValueError('n_population should be greater than 0, got %s.'
+                         % n_population)
+
+    if n_samples > n_population:
+        raise ValueError('n_population should be greater or equal than '
+                         'n_samples, got n_samples > n_population (%s > %s)'
+                         % (n_samples, n_population))
+
+
+cpdef _sample_without_replacement_with_tracking_selection(
+        default_int n_population,
+        default_int n_samples,
+        random_state=None):
+    r"""Sample integers without replacement.
+
+    Select n_samples integers from the set [0, n_population) without
+    replacement.
+
+    Time complexity:
+        - Worst-case: unbounded
+        - Average-case:
+            O(O(np.random.randint) * \sum_{i=1}^n_samples 1 /
+                                              (1 - i / n_population)))
+            <= O(O(np.random.randint) *
+                   n_population * ln((n_population - 2)
+                                     /(n_population - 1 - n_samples)))
+            <= O(O(np.random.randint) *
+                 n_population * 1 / (1 - n_samples / n_population))
+
+    Space complexity of O(n_samples) in a python set.
+
+
+    Parameters
+    ----------
+    n_population : int
+        The size of the set to sample from.
+
+    n_samples : int
+        The number of integer to sample.
+
+    random_state : int, RandomState instance or None, default=None
+        If int, random_state is the seed used by the random number generator;
+        If RandomState instance, random_state is the random number generator;
+        If None, the random number generator is the RandomState instance used
+        by `np.random`.
+
+    Returns
+    -------
+    out : ndarray of shape (n_samples,)
+        The sampled subsets of integer.
+    """
+    _sample_without_replacement_check_input(n_population, n_samples)
+
+    cdef default_int i
+    cdef default_int j
+    cdef default_int[::1] out = np.empty((n_samples, ), dtype=int)
+
+    rng = check_random_state(random_state)
+    rng_randint = rng.randint
+
+    # The following line of code are heavily inspired from python core,
+    # more precisely of random.sample.
+    cdef set selected = set()
+
+    for i in range(n_samples):
+        j = rng_randint(n_population)
+        while j in selected:
+            j = rng_randint(n_population)
+        selected.add(j)
+        out[i] = j
+
+    return np.asarray(out)
+
+
+cpdef _sample_without_replacement_with_pool(default_int n_population,
+                                            default_int n_samples,
+                                            random_state=None):
+    """Sample integers without replacement.
+
+    Select n_samples integers from the set [0, n_population) without
+    replacement.
+
+    Time complexity: O(n_population +  O(np.random.randint) * n_samples)
+
+    Space complexity of O(n_population + n_samples).
+
+
+    Parameters
+    ----------
+    n_population : int
+        The size of the set to sample from.
+
+    n_samples : int
+        The number of integer to sample.
+
+    random_state : int, RandomState instance or None, default=None
+        If int, random_state is the seed used by the random number generator;
+        If RandomState instance, random_state is the random number generator;
+        If None, the random number generator is the RandomState instance used
+        by `np.random`.
+
+    Returns
+    -------
+    out : ndarray of shape (n_samples,)
+        The sampled subsets of integer.
+    """
+    _sample_without_replacement_check_input(n_population, n_samples)
+
+    cdef default_int i
+    cdef default_int j
+    cdef default_int[::1] out = np.empty((n_samples,), dtype=int)
+    cdef default_int[::1] pool = np.empty((n_population,), dtype=int)
+
+    rng = check_random_state(random_state)
+    rng_randint = rng.randint
+
+    # Initialize the pool
+    for i in range(n_population):
+        pool[i] = i
+
+    # The following line of code are heavily inspired from python core,
+    # more precisely of random.sample.
+    for i in range(n_samples):
+        j = rng_randint(n_population - i)  # invariant: non-selected at [0,n-i)
+        out[i] = pool[j]
+        pool[j] = pool[n_population - i - 1]  # move non-selected item into vacancy
+
+    return np.asarray(out)
+
+
+cpdef _sample_without_replacement_with_reservoir_sampling(
+    default_int n_population,
+    default_int n_samples,
+    random_state=None
+):
+    """Sample integers without replacement.
+
+    Select n_samples integers from the set [0, n_population) without
+    replacement.
+
+    Time complexity of
+        O((n_population - n_samples) * O(np.random.randint) + n_samples)
+    Space complexity of O(n_samples)
+
+
+    Parameters
+    ----------
+    n_population : int
+        The size of the set to sample from.
+
+    n_samples : int
+         The number of integer to sample.
+
+    random_state : int, RandomState instance or None, default=None
+        If int, random_state is the seed used by the random number generator;
+        If RandomState instance, random_state is the random number generator;
+        If None, the random number generator is the RandomState instance used
+        by `np.random`.
+
+    Returns
+    -------
+    out : ndarray of shape (n_samples,)
+        The sampled subsets of integer. The order of the items is not
+        necessarily random. Use a random permutation of the array if the order
+        of the items has to be randomized.
+    """
+    _sample_without_replacement_check_input(n_population, n_samples)
+
+    cdef default_int i
+    cdef default_int j
+    cdef default_int[::1] out = np.empty((n_samples, ), dtype=int)
+
+    rng = check_random_state(random_state)
+    rng_randint = rng.randint
+
+    # This cython implementation is based on the one of Robert Kern:
+    # http://mail.scipy.org/pipermail/numpy-discussion/2010-December/
+    # 054289.html
+    #
+    for i in range(n_samples):
+        out[i] = i
+
+    for i from n_samples <= i < n_population:
+        j = rng_randint(0, i + 1)
+        if j < n_samples:
+            out[j] = i
+
+    return np.asarray(out)
+
+
+cdef _sample_without_replacement(default_int n_population,
+                                 default_int n_samples,
+                                 method="auto",
+                                 random_state=None):
+    """Sample integers without replacement.
+
+    Private function for the implementation, see sample_without_replacement
+    documentation for more details.
+    """
+    _sample_without_replacement_check_input(n_population, n_samples)
+
+    all_methods = ("auto", "tracking_selection", "reservoir_sampling", "pool")
+
+    ratio = <double> n_samples / n_population if n_population != 0.0 else 1.0
+
+    # Check ratio and use permutation unless ratio < 0.01 or ratio > 0.99
+    if method == "auto" and ratio > 0.01 and ratio < 0.99:
+        rng = check_random_state(random_state)
+        return rng.permutation(n_population)[:n_samples]
+
+    if method == "auto" or method == "tracking_selection":
+        # TODO the pool based method can also be used.
+        #      however, it requires special benchmark to take into account
+        #      the memory requirement of the array vs the set.
+
+        # The value 0.2 has been determined through benchmarking.
+        if ratio < 0.2:
+            return _sample_without_replacement_with_tracking_selection(
+                n_population, n_samples, random_state)
+        else:
+            return _sample_without_replacement_with_reservoir_sampling(
+                n_population, n_samples, random_state)
+
+    elif method == "reservoir_sampling":
+        return _sample_without_replacement_with_reservoir_sampling(
+            n_population, n_samples, random_state)
+
+    elif method == "pool":
+        return _sample_without_replacement_with_pool(n_population, n_samples,
+                                                     random_state)
+    else:
+        raise ValueError('Expected a method name in %s, got %s. '
+                         % (all_methods, method))
+
+
+def sample_without_replacement(
+        object n_population, object n_samples, method="auto", random_state=None):
+    """Sample integers without replacement.
+
+    Select n_samples integers from the set [0, n_population) without
+    replacement.
+
+
+    Parameters
+    ----------
+    n_population : int
+        The size of the set to sample from.
+
+    n_samples : int
+        The number of integer to sample.
+
+    random_state : int, RandomState instance or None, default=None
+        If int, random_state is the seed used by the random number generator;
+        If RandomState instance, random_state is the random number generator;
+        If None, the random number generator is the RandomState instance used
+        by `np.random`.
+
+    method : {"auto", "tracking_selection", "reservoir_sampling", "pool"}, \
+            default='auto'
+        If method == "auto", the ratio of n_samples / n_population is used
+        to determine which algorithm to use:
+        If ratio is between 0 and 0.01, tracking selection is used.
+        If ratio is between 0.01 and 0.99, numpy.random.permutation is used.
+        If ratio is greater than 0.99, reservoir sampling is used.
+        The order of the selected integers is undefined. If a random order is
+        desired, the selected subset should be shuffled.
+
+        If method =="tracking_selection", a set based implementation is used
+        which is suitable for `n_samples` <<< `n_population`.
+
+        If method == "reservoir_sampling", a reservoir sampling algorithm is
+        used which is suitable for high memory constraint or when
+        O(`n_samples`) ~ O(`n_population`).
+        The order of the selected integers is undefined. If a random order is
+        desired, the selected subset should be shuffled.
+
+        If method == "pool", a pool based algorithm is particularly fast, even
+        faster than the tracking selection method. However, a vector containing
+        the entire population has to be initialized.
+        If n_samples ~ n_population, the reservoir sampling method is faster.
+
+    Returns
+    -------
+    out : ndarray of shape (n_samples,)
+        The sampled subsets of integer. The subset of selected integer might
+        not be randomized, see the method argument.
+
+    Examples
+    --------
+    >>> from sklearn.utils.random import sample_without_replacement
+    >>> sample_without_replacement(10, 5, random_state=42)
+    array([8, 1, 5, 0, 7])
+    """
+    cdef:
+        intp_t n_pop_intp, n_samples_intp
+        long n_pop_long, n_samples_long
+
+    # On most platforms `np.int_ is np.intp`.  However, before NumPy 2 the
+    # default integer `np.int_` was a long which is 32bit on 64bit windows
+    # while `intp` is 64bit on 64bit platforms and 32bit on 32bit ones.
+    if np.int_ is np.intp:
+        # Branch always taken on NumPy >=2 (or when not on 64bit windows).
+        # Cython has different rules for conversion of values to integers.
+        # For NumPy <1.26.2 AND Cython 3, this first branch requires `int()`
+        # called explicitly to allow e.g. floats.
+        n_pop_intp = int(n_population)
+        n_samples_intp = int(n_samples)
+        return _sample_without_replacement(
+                n_pop_intp, n_samples_intp, method, random_state)
+    else:
+        # Branch taken on 64bit windows with Numpy<2.0 where `long` is 32bit
+        n_pop_long = n_population
+        n_samples_long = n_samples
+        return _sample_without_replacement(
+                n_pop_long, n_samples_long, method, random_state)
+
+
+def _our_rand_r_py(seed):
+    """Python utils to test the our_rand_r function"""
+    cdef uint32_t my_seed = seed
+    return our_rand_r(&my_seed)
@@ -0,0 +1,314 @@
+"""Utilities to get the response values of a classifier or a regressor.
+
+It allows to make uniform checks and validation.
+"""
+
+import numpy as np
+
+from ..base import is_classifier
+from .multiclass import type_of_target
+from .validation import _check_response_method, check_is_fitted
+
+
+def _process_predict_proba(*, y_pred, target_type, classes, pos_label):
+    """Get the response values when the response method is `predict_proba`.
+
+    This function process the `y_pred` array in the binary and multi-label cases.
+    In the binary case, it selects the column corresponding to the positive
+    class. In the multi-label case, it stacks the predictions if they are not
+    in the "compressed" format `(n_samples, n_outputs)`.
+
+    Parameters
+    ----------
+    y_pred : ndarray
+        Output of `estimator.predict_proba`. The shape depends on the target type:
+
+        - for binary classification, it is a 2d array of shape `(n_samples, 2)`;
+        - for multiclass classification, it is a 2d array of shape
+          `(n_samples, n_classes)`;
+        - for multilabel classification, it is either a list of 2d arrays of shape
+          `(n_samples, 2)` (e.g. `RandomForestClassifier` or `KNeighborsClassifier`) or
+          an array of shape `(n_samples, n_outputs)` (e.g. `MLPClassifier` or
+          `RidgeClassifier`).
+
+    target_type : {"binary", "multiclass", "multilabel-indicator"}
+        Type of the target.
+
+    classes : ndarray of shape (n_classes,) or list of such arrays
+        Class labels as reported by `estimator.classes_`.
+
+    pos_label : int, float, bool or str
+        Only used with binary and multiclass targets.
+
+    Returns
+    -------
+    y_pred : ndarray of shape (n_samples,), (n_samples, n_classes) or \
+            (n_samples, n_output)
+        Compressed predictions format as requested by the metrics.
+    """
+    if target_type == "binary" and y_pred.shape[1] < 2:
+        # We don't handle classifiers trained on a single class.
+        raise ValueError(
+            f"Got predict_proba of shape {y_pred.shape}, but need "
+            "classifier with two classes."
+        )
+
+    if target_type == "binary":
+        col_idx = np.flatnonzero(classes == pos_label)[0]
+        return y_pred[:, col_idx]
+    elif target_type == "multilabel-indicator":
+        # Use a compress format of shape `(n_samples, n_output)`.
+        # Only `MLPClassifier` and `RidgeClassifier` return an array of shape
+        # `(n_samples, n_outputs)`.
+        if isinstance(y_pred, list):
+            # list of arrays of shape `(n_samples, 2)`
+            return np.vstack([p[:, -1] for p in y_pred]).T
+        else:
+            # array of shape `(n_samples, n_outputs)`
+            return y_pred
+
+    return y_pred
+
+
+def _process_decision_function(*, y_pred, target_type, classes, pos_label):
+    """Get the response values when the response method is `decision_function`.
+
+    This function process the `y_pred` array in the binary and multi-label cases.
+    In the binary case, it inverts the sign of the score if the positive label
+    is not `classes[1]`. In the multi-label case, it stacks the predictions if
+    they are not in the "compressed" format `(n_samples, n_outputs)`.
+
+    Parameters
+    ----------
+    y_pred : ndarray
+        Output of `estimator.predict_proba`. The shape depends on the target type:
+
+        - for binary classification, it is a 1d array of shape `(n_samples,)` where the
+          sign is assuming that `classes[1]` is the positive class;
+        - for multiclass classification, it is a 2d array of shape
+          `(n_samples, n_classes)`;
+        - for multilabel classification, it is a 2d array of shape `(n_samples,
+          n_outputs)`.
+
+    target_type : {"binary", "multiclass", "multilabel-indicator"}
+        Type of the target.
+
+    classes : ndarray of shape (n_classes,) or list of such arrays
+        Class labels as reported by `estimator.classes_`.
+
+    pos_label : int, float, bool or str
+        Only used with binary and multiclass targets.
+
+    Returns
+    -------
+    y_pred : ndarray of shape (n_samples,), (n_samples, n_classes) or \
+            (n_samples, n_output)
+        Compressed predictions format as requested by the metrics.
+    """
+    if target_type == "binary" and pos_label == classes[0]:
+        return -1 * y_pred
+    return y_pred
+
+
+def _get_response_values(
+    estimator,
+    X,
+    response_method,
+    pos_label=None,
+    return_response_method_used=False,
+):
+    """Compute the response values of a classifier, an outlier detector, or a regressor.
+
+    The response values are predictions such that it follows the following shape:
+
+    - for binary classification, it is a 1d array of shape `(n_samples,)`;
+    - for multiclass classification, it is a 2d array of shape `(n_samples, n_classes)`;
+    - for multilabel classification, it is a 2d array of shape `(n_samples, n_outputs)`;
+    - for outlier detection, it is a 1d array of shape `(n_samples,)`;
+    - for regression, it is a 1d array of shape `(n_samples,)`.
+
+    If `estimator` is a binary classifier, also return the label for the
+    effective positive class.
+
+    This utility is used primarily in the displays and the scikit-learn scorers.
+
+    .. versionadded:: 1.3
+
+    Parameters
+    ----------
+    estimator : estimator instance
+        Fitted classifier, outlier detector, or regressor or a
+        fitted :class:`~sklearn.pipeline.Pipeline` in which the last estimator is a
+        classifier, an outlier detector, or a regressor.
+
+    X : {array-like, sparse matrix} of shape (n_samples, n_features)
+        Input values.
+
+    response_method : {"predict_proba", "predict_log_proba", "decision_function", \
+            "predict"} or list of such str
+        Specifies the response method to use get prediction from an estimator
+        (i.e. :term:`predict_proba`, :term:`predict_log_proba`,
+        :term:`decision_function` or :term:`predict`). Possible choices are:
+
+        - if `str`, it corresponds to the name to the method to return;
+        - if a list of `str`, it provides the method names in order of
+          preference. The method returned corresponds to the first method in
+          the list and which is implemented by `estimator`.
+
+    pos_label : int, float, bool or str, default=None
+        The class considered as the positive class when computing
+        the metrics. If `None` and target is 'binary', `estimators.classes_[1]` is
+        considered as the positive class.
+
+    return_response_method_used : bool, default=False
+        Whether to return the response method used to compute the response
+        values.
+
+        .. versionadded:: 1.4
+
+    Returns
+    -------
+    y_pred : ndarray of shape (n_samples,), (n_samples, n_classes) or \
+            (n_samples, n_outputs)
+        Target scores calculated from the provided `response_method`
+        and `pos_label`.
+
+    pos_label : int, float, bool, str or None
+        The class considered as the positive class when computing
+        the metrics. Returns `None` if `estimator` is a regressor or an outlier
+        detector.
+
+    response_method_used : str
+        The response method used to compute the response values. Only returned
+        if `return_response_method_used` is `True`.
+
+        .. versionadded:: 1.4
+
+    Raises
+    ------
+    ValueError
+        If `pos_label` is not a valid label.
+        If the shape of `y_pred` is not consistent for binary classifier.
+        If the response method can be applied to a classifier only and
+        `estimator` is a regressor.
+    """
+    from sklearn.base import is_classifier, is_outlier_detector  # noqa
+
+    if is_classifier(estimator):
+        prediction_method = _check_response_method(estimator, response_method)
+        classes = estimator.classes_
+        target_type = type_of_target(classes)
+
+        if target_type in ("binary", "multiclass"):
+            if pos_label is not None and pos_label not in classes.tolist():
+                raise ValueError(
+                    f"pos_label={pos_label} is not a valid label: It should be "
+                    f"one of {classes}"
+                )
+            elif pos_label is None and target_type == "binary":
+                pos_label = classes[-1]
+
+        y_pred = prediction_method(X)
+
+        if prediction_method.__name__ in ("predict_proba", "predict_log_proba"):
+            y_pred = _process_predict_proba(
+                y_pred=y_pred,
+                target_type=target_type,
+                classes=classes,
+                pos_label=pos_label,
+            )
+        elif prediction_method.__name__ == "decision_function":
+            y_pred = _process_decision_function(
+                y_pred=y_pred,
+                target_type=target_type,
+                classes=classes,
+                pos_label=pos_label,
+            )
+    elif is_outlier_detector(estimator):
+        prediction_method = _check_response_method(estimator, response_method)
+        y_pred, pos_label = prediction_method(X), None
+    else:  # estimator is a regressor
+        if response_method != "predict":
+            raise ValueError(
+                f"{estimator.__class__.__name__} should either be a classifier to be "
+                f"used with response_method={response_method} or the response_method "
+                "should be 'predict'. Got a regressor with response_method="
+                f"{response_method} instead."
+            )
+        prediction_method = estimator.predict
+        y_pred, pos_label = prediction_method(X), None
+
+    if return_response_method_used:
+        return y_pred, pos_label, prediction_method.__name__
+    return y_pred, pos_label
+
+
+def _get_response_values_binary(
+    estimator, X, response_method, pos_label=None, return_response_method_used=False
+):
+    """Compute the response values of a binary classifier.
+
+    Parameters
+    ----------
+    estimator : estimator instance
+        Fitted classifier or a fitted :class:`~sklearn.pipeline.Pipeline`
+        in which the last estimator is a binary classifier.
+
+    X : {array-like, sparse matrix} of shape (n_samples, n_features)
+        Input values.
+
+    response_method : {'auto', 'predict_proba', 'decision_function'}
+        Specifies whether to use :term:`predict_proba` or
+        :term:`decision_function` as the target response. If set to 'auto',
+        :term:`predict_proba` is tried first and if it does not exist
+        :term:`decision_function` is tried next.
+
+    pos_label : int, float, bool or str, default=None
+        The class considered as the positive class when computing
+        the metrics. By default, `estimators.classes_[1]` is
+        considered as the positive class.
+
+    return_response_method_used : bool, default=False
+        Whether to return the response method used to compute the response
+        values.
+
+        .. versionadded:: 1.5
+
+    Returns
+    -------
+    y_pred : ndarray of shape (n_samples,)
+        Target scores calculated from the provided response_method
+        and pos_label.
+
+    pos_label : int, float, bool or str
+        The class considered as the positive class when computing
+        the metrics.
+
+    response_method_used : str
+        The response method used to compute the response values. Only returned
+        if `return_response_method_used` is `True`.
+
+        .. versionadded:: 1.5
+    """
+    classification_error = "Expected 'estimator' to be a binary classifier."
+
+    check_is_fitted(estimator)
+    if not is_classifier(estimator):
+        raise ValueError(
+            classification_error + f" Got {estimator.__class__.__name__} instead."
+        )
+    elif len(estimator.classes_) != 2:
+        raise ValueError(
+            classification_error + f" Got {len(estimator.classes_)} classes instead."
+        )
+
+    if response_method == "auto":
+        response_method = ["predict_proba", "decision_function"]
+
+    return _get_response_values(
+        estimator,
+        X,
+        response_method,
+        pos_label=pos_label,
+        return_response_method_used=return_response_method_used,
+    )
@@ -0,0 +1,76 @@
+{{py:
+
+"""
+Dataset abstractions for sequential data access.
+
+Template file for easily generate fused types consistent code using Tempita
+(https://github.com/cython/cython/blob/master/Cython/Tempita/_tempita.py).
+
+Generated file: _seq_dataset.pxd
+
+Each class is duplicated for all dtypes (float and double). The keywords
+between double braces are substituted in setup.py.
+"""
+
+# name_suffix, c_type
+dtypes = [('64', 'float64_t'),
+          ('32', 'float32_t')]
+
+}}
+"""Dataset abstractions for sequential data access."""
+
+from ._typedefs cimport float32_t, float64_t, intp_t, uint32_t
+
+# SequentialDataset and its two concrete subclasses are (optionally randomized)
+# iterators over the rows of a matrix X and corresponding target values y.
+
+{{for name_suffix, c_type in dtypes}}
+
+#------------------------------------------------------------------------------
+
+cdef class SequentialDataset{{name_suffix}}:
+    cdef int current_index
+    cdef int[::1] index
+    cdef int *index_data_ptr
+    cdef Py_ssize_t n_samples
+    cdef uint32_t seed
+
+    cdef void shuffle(self, uint32_t seed) noexcept nogil
+    cdef int _get_next_index(self) noexcept nogil
+    cdef int _get_random_index(self) noexcept nogil
+
+    cdef void _sample(self, {{c_type}} **x_data_ptr, int **x_ind_ptr,
+                      int *nnz, {{c_type}} *y, {{c_type}} *sample_weight,
+                      int current_index) noexcept nogil
+    cdef void next(self, {{c_type}} **x_data_ptr, int **x_ind_ptr,
+                   int *nnz, {{c_type}} *y, {{c_type}} *sample_weight) noexcept nogil
+    cdef int random(self, {{c_type}} **x_data_ptr, int **x_ind_ptr,
+                    int *nnz, {{c_type}} *y, {{c_type}} *sample_weight) noexcept nogil
+
+
+cdef class ArrayDataset{{name_suffix}}(SequentialDataset{{name_suffix}}):
+    cdef const {{c_type}}[:, ::1] X
+    cdef const {{c_type}}[::1] Y
+    cdef const {{c_type}}[::1] sample_weights
+    cdef Py_ssize_t n_features
+    cdef intp_t X_stride
+    cdef {{c_type}} *X_data_ptr
+    cdef {{c_type}} *Y_data_ptr
+    cdef const int[::1] feature_indices
+    cdef int *feature_indices_ptr
+    cdef {{c_type}} *sample_weight_data
+
+
+cdef class CSRDataset{{name_suffix}}(SequentialDataset{{name_suffix}}):
+    cdef const {{c_type}}[::1] X_data
+    cdef const int[::1] X_indptr
+    cdef const int[::1] X_indices
+    cdef const {{c_type}}[::1] Y
+    cdef const {{c_type}}[::1] sample_weights
+    cdef {{c_type}} *X_data_ptr
+    cdef int *X_indptr_ptr
+    cdef int *X_indices_ptr
+    cdef {{c_type}} *Y_data_ptr
+    cdef {{c_type}} *sample_weight_data
+
+{{endfor}}
@@ -0,0 +1,351 @@
+{{py:
+
+"""
+Dataset abstractions for sequential data access.
+Template file for easily generate fused types consistent code using Tempita
+(https://github.com/cython/cython/blob/master/Cython/Tempita/_tempita.py).
+
+Generated file: _seq_dataset.pyx
+
+Each class is duplicated for all dtypes (float and double). The keywords
+between double braces are substituted in setup.py.
+
+Author: Peter Prettenhofer <peter.prettenhofer@gmail.com>
+        Arthur Imbert <arthurimbert05@gmail.com>
+        Joan Massich <mailsik@gmail.com>
+
+License: BSD 3 clause
+"""
+
+# name_suffix, c_type, np_type
+dtypes = [('64', 'float64_t', 'np.float64'),
+          ('32', 'float32_t', 'np.float32')]
+
+}}
+"""Dataset abstractions for sequential data access."""
+
+import numpy as np
+
+cimport cython
+from libc.limits cimport INT_MAX
+
+from ._random cimport our_rand_r
+from ._typedefs cimport float32_t, float64_t, uint32_t
+
+{{for name_suffix, c_type, np_type in dtypes}}
+
+#------------------------------------------------------------------------------
+
+cdef class SequentialDataset{{name_suffix}}:
+    """Base class for datasets with sequential data access.
+
+    SequentialDataset is used to iterate over the rows of a matrix X and
+    corresponding target values y, i.e. to iterate over samples.
+    There are two methods to get the next sample:
+        - next : Iterate sequentially (optionally randomized)
+        - random : Iterate randomly (with replacement)
+
+    Attributes
+    ----------
+    index : np.ndarray
+        Index array for fast shuffling.
+
+    index_data_ptr : int
+        Pointer to the index array.
+
+    current_index : int
+        Index of current sample in ``index``.
+        The index of current sample in the data is given by
+        index_data_ptr[current_index].
+
+    n_samples : Py_ssize_t
+        Number of samples in the dataset.
+
+    seed : uint32_t
+        Seed used for random sampling. This attribute is modified at each call to the
+        `random` method.
+    """
+
+    cdef void next(self, {{c_type}} **x_data_ptr, int **x_ind_ptr,
+                   int *nnz, {{c_type}} *y, {{c_type}} *sample_weight) noexcept nogil:
+        """Get the next example ``x`` from the dataset.
+
+        This method gets the next sample looping sequentially over all samples.
+        The order can be shuffled with the method ``shuffle``.
+        Shuffling once before iterating over all samples corresponds to a
+        random draw without replacement. It is used for instance in SGD solver.
+
+        Parameters
+        ----------
+        x_data_ptr : {{c_type}}**
+            A pointer to the {{c_type}} array which holds the feature
+            values of the next example.
+
+        x_ind_ptr : np.intc**
+            A pointer to the int array which holds the feature
+            indices of the next example.
+
+        nnz : int*
+            A pointer to an int holding the number of non-zero
+            values of the next example.
+
+        y : {{c_type}}*
+            The target value of the next example.
+
+        sample_weight : {{c_type}}*
+            The weight of the next example.
+        """
+        cdef int current_index = self._get_next_index()
+        self._sample(x_data_ptr, x_ind_ptr, nnz, y, sample_weight,
+                     current_index)
+
+    cdef int random(self, {{c_type}} **x_data_ptr, int **x_ind_ptr,
+                    int *nnz, {{c_type}} *y, {{c_type}} *sample_weight) noexcept nogil:
+        """Get a random example ``x`` from the dataset.
+
+        This method gets next sample chosen randomly over a uniform
+        distribution. It corresponds to a random draw with replacement.
+        It is used for instance in SAG solver.
+
+        Parameters
+        ----------
+        x_data_ptr : {{c_type}}**
+            A pointer to the {{c_type}} array which holds the feature
+            values of the next example.
+
+        x_ind_ptr : np.intc**
+            A pointer to the int array which holds the feature
+            indices of the next example.
+
+        nnz : int*
+            A pointer to an int holding the number of non-zero
+            values of the next example.
+
+        y : {{c_type}}*
+            The target value of the next example.
+
+        sample_weight : {{c_type}}*
+            The weight of the next example.
+
+        Returns
+        -------
+        current_index : int
+            Index of current sample.
+        """
+        cdef int current_index = self._get_random_index()
+        self._sample(x_data_ptr, x_ind_ptr, nnz, y, sample_weight,
+                     current_index)
+        return current_index
+
+    cdef void shuffle(self, uint32_t seed) noexcept nogil:
+        """Permutes the ordering of examples."""
+        # Fisher-Yates shuffle
+        cdef int *ind = self.index_data_ptr
+        cdef int n = self.n_samples
+        cdef unsigned i, j
+        for i in range(n - 1):
+            j = i + our_rand_r(&seed) % (n - i)
+            ind[i], ind[j] = ind[j], ind[i]
+
+    cdef int _get_next_index(self) noexcept nogil:
+        cdef int current_index = self.current_index
+        if current_index >= (self.n_samples - 1):
+            current_index = -1
+
+        current_index += 1
+        self.current_index = current_index
+        return self.current_index
+
+    cdef int _get_random_index(self) noexcept nogil:
+        cdef int n = self.n_samples
+        cdef int current_index = our_rand_r(&self.seed) % n
+        self.current_index = current_index
+        return current_index
+
+    cdef void _sample(self, {{c_type}} **x_data_ptr, int **x_ind_ptr,
+                      int *nnz, {{c_type}} *y, {{c_type}} *sample_weight,
+                      int current_index) noexcept nogil:
+        pass
+
+    def _shuffle_py(self, uint32_t seed):
+        """python function used for easy testing"""
+        self.shuffle(seed)
+
+    def _next_py(self):
+        """python function used for easy testing"""
+        cdef int current_index = self._get_next_index()
+        return self._sample_py(current_index)
+
+    def _random_py(self):
+        """python function used for easy testing"""
+        cdef int current_index = self._get_random_index()
+        return self._sample_py(current_index)
+
+    def _sample_py(self, int current_index):
+        """python function used for easy testing"""
+        cdef {{c_type}}* x_data_ptr
+        cdef int* x_indices_ptr
+        cdef int nnz, j
+        cdef {{c_type}} y, sample_weight
+
+        # call _sample in cython
+        self._sample(&x_data_ptr, &x_indices_ptr, &nnz, &y, &sample_weight,
+                     current_index)
+
+        # transform the pointed data in numpy CSR array
+        cdef {{c_type}}[:] x_data = np.empty(nnz, dtype={{np_type}})
+        cdef int[:] x_indices = np.empty(nnz, dtype=np.int32)
+        cdef int[:] x_indptr = np.asarray([0, nnz], dtype=np.int32)
+
+        for j in range(nnz):
+            x_data[j] = x_data_ptr[j]
+            x_indices[j] = x_indices_ptr[j]
+
+        cdef int sample_idx = self.index_data_ptr[current_index]
+
+        return (
+            (np.asarray(x_data), np.asarray(x_indices), np.asarray(x_indptr)),
+            y,
+            sample_weight,
+            sample_idx,
+        )
+
+
+cdef class ArrayDataset{{name_suffix}}(SequentialDataset{{name_suffix}}):
+    """Dataset backed by a two-dimensional numpy array.
+
+    The dtype of the numpy array is expected to be ``{{np_type}}`` ({{c_type}})
+    and C-style memory layout.
+    """
+
+    def __cinit__(
+        self,
+        const {{c_type}}[:, ::1] X,
+        const {{c_type}}[::1] Y,
+        const {{c_type}}[::1] sample_weights,
+        uint32_t seed=1,
+    ):
+        """A ``SequentialDataset`` backed by a two-dimensional numpy array.
+
+        Parameters
+        ----------
+        X : ndarray, dtype={{c_type}}, ndim=2, mode='c'
+            The sample array, of shape(n_samples, n_features)
+
+        Y : ndarray, dtype={{c_type}}, ndim=1, mode='c'
+            The target array, of shape(n_samples, )
+
+        sample_weights : ndarray, dtype={{c_type}}, ndim=1, mode='c'
+            The weight of each sample, of shape(n_samples,)
+        """
+        if X.shape[0] > INT_MAX or X.shape[1] > INT_MAX:
+            raise ValueError("More than %d samples or features not supported;"
+                             " got (%d, %d)."
+                             % (INT_MAX, X.shape[0], X.shape[1]))
+
+        # keep a reference to the data to prevent garbage collection
+        self.X = X
+        self.Y = Y
+        self.sample_weights = sample_weights
+
+        self.n_samples = X.shape[0]
+        self.n_features = X.shape[1]
+
+        self.feature_indices = np.arange(0, self.n_features, dtype=np.intc)
+        self.feature_indices_ptr = <int *> &self.feature_indices[0]
+
+        self.current_index = -1
+        self.X_stride = X.strides[0] // X.itemsize
+        self.X_data_ptr = <{{c_type}} *> &X[0, 0]
+        self.Y_data_ptr = <{{c_type}} *> &Y[0]
+        self.sample_weight_data = <{{c_type}} *> &sample_weights[0]
+
+        # Use index array for fast shuffling
+        self.index = np.arange(0, self.n_samples, dtype=np.intc)
+        self.index_data_ptr = <int *> &self.index[0]
+        # seed should not be 0 for our_rand_r
+        self.seed = max(seed, 1)
+
+    cdef void _sample(self, {{c_type}} **x_data_ptr, int **x_ind_ptr,
+                      int *nnz, {{c_type}} *y, {{c_type}} *sample_weight,
+                      int current_index) noexcept nogil:
+        cdef long long sample_idx = self.index_data_ptr[current_index]
+        cdef long long offset = sample_idx * self.X_stride
+
+        y[0] = self.Y_data_ptr[sample_idx]
+        x_data_ptr[0] = self.X_data_ptr + offset
+        x_ind_ptr[0] = self.feature_indices_ptr
+        nnz[0] = self.n_features
+        sample_weight[0] = self.sample_weight_data[sample_idx]
+
+
+cdef class CSRDataset{{name_suffix}}(SequentialDataset{{name_suffix}}):
+    """A ``SequentialDataset`` backed by a scipy sparse CSR matrix. """
+
+    def __cinit__(
+        self,
+        const {{c_type}}[::1] X_data,
+        const int[::1] X_indptr,
+        const int[::1] X_indices,
+        const {{c_type}}[::1] Y,
+        const {{c_type}}[::1] sample_weights,
+        uint32_t seed=1,
+    ):
+        """Dataset backed by a scipy sparse CSR matrix.
+
+        The feature indices of ``x`` are given by x_ind_ptr[0:nnz].
+        The corresponding feature values are given by
+        x_data_ptr[0:nnz].
+
+        Parameters
+        ----------
+        X_data : ndarray, dtype={{c_type}}, ndim=1, mode='c'
+            The data array of the CSR features matrix.
+
+        X_indptr : ndarray, dtype=np.intc, ndim=1, mode='c'
+            The index pointer array of the CSR features matrix.
+
+        X_indices : ndarray, dtype=np.intc, ndim=1, mode='c'
+            The column indices array of the CSR features matrix.
+
+        Y : ndarray, dtype={{c_type}}, ndim=1, mode='c'
+            The target values.
+
+        sample_weights : ndarray, dtype={{c_type}}, ndim=1, mode='c'
+            The weight of each sample.
+        """
+        # keep a reference to the data to prevent garbage collection
+        self.X_data = X_data
+        self.X_indptr = X_indptr
+        self.X_indices = X_indices
+        self.Y = Y
+        self.sample_weights = sample_weights
+
+        self.n_samples = Y.shape[0]
+        self.current_index = -1
+        self.X_data_ptr = <{{c_type}} *> &X_data[0]
+        self.X_indptr_ptr = <int *> &X_indptr[0]
+        self.X_indices_ptr = <int *> &X_indices[0]
+
+        self.Y_data_ptr = <{{c_type}} *> &Y[0]
+        self.sample_weight_data = <{{c_type}} *> &sample_weights[0]
+
+        # Use index array for fast shuffling
+        self.index = np.arange(self.n_samples, dtype=np.intc)
+        self.index_data_ptr = <int *> &self.index[0]
+        # seed should not be 0 for our_rand_r
+        self.seed = max(seed, 1)
+
+    cdef void _sample(self, {{c_type}} **x_data_ptr, int **x_ind_ptr,
+                      int *nnz, {{c_type}} *y, {{c_type}} *sample_weight,
+                      int current_index) noexcept nogil:
+        cdef long long sample_idx = self.index_data_ptr[current_index]
+        cdef long long offset = self.X_indptr_ptr[sample_idx]
+        y[0] = self.Y_data_ptr[sample_idx]
+        x_data_ptr[0] = self.X_data_ptr + offset
+        x_ind_ptr[0] = self.X_indices_ptr + offset
+        nnz[0] = self.X_indptr_ptr[sample_idx + 1] - offset
+        sample_weight[0] = self.sample_weight_data[sample_idx]
+
+
+{{endfor}}
@@ -0,0 +1,459 @@
+import importlib
+from functools import wraps
+from typing import Protocol, runtime_checkable
+
+import numpy as np
+from scipy.sparse import issparse
+
+from .._config import get_config
+from ._available_if import available_if
+
+
+def check_library_installed(library):
+    """Check library is installed."""
+    try:
+        return importlib.import_module(library)
+    except ImportError as exc:
+        raise ImportError(
+            f"Setting output container to '{library}' requires {library} to be"
+            " installed"
+        ) from exc
+
+
+def get_columns(columns):
+    if callable(columns):
+        try:
+            return columns()
+        except Exception:
+            return None
+    return columns
+
+
+@runtime_checkable
+class ContainerAdapterProtocol(Protocol):
+    container_lib: str
+
+    def create_container(self, X_output, X_original, columns, inplace=False):
+        """Create container from `X_output` with additional metadata.
+
+        Parameters
+        ----------
+        X_output : {ndarray, dataframe}
+            Data to wrap.
+
+        X_original : {ndarray, dataframe}
+            Original input dataframe. This is used to extract the metadata that should
+            be passed to `X_output`, e.g. pandas row index.
+
+        columns : callable, ndarray, or None
+            The column names or a callable that returns the column names. The
+            callable is useful if the column names require some computation. If `None`,
+            then no columns are passed to the container's constructor.
+
+        inplace : bool, default=False
+            Whether or not we intend to modify `X_output` in-place. However, it does
+            not guarantee that we return the same object if the in-place operation
+            is not possible.
+
+        Returns
+        -------
+        wrapped_output : container_type
+            `X_output` wrapped into the container type.
+        """
+
+    def is_supported_container(self, X):
+        """Return True if X is a supported container.
+
+        Parameters
+        ----------
+        Xs: container
+            Containers to be checked.
+
+        Returns
+        -------
+        is_supported_container : bool
+            True if X is a supported container.
+        """
+
+    def rename_columns(self, X, columns):
+        """Rename columns in `X`.
+
+        Parameters
+        ----------
+        X : container
+            Container which columns is updated.
+
+        columns : ndarray of str
+            Columns to update the `X`'s columns with.
+
+        Returns
+        -------
+        updated_container : container
+            Container with new names.
+        """
+
+    def hstack(self, Xs):
+        """Stack containers horizontally (column-wise).
+
+        Parameters
+        ----------
+        Xs : list of containers
+            List of containers to stack.
+
+        Returns
+        -------
+        stacked_Xs : container
+            Stacked containers.
+        """
+
+
+class PandasAdapter:
+    container_lib = "pandas"
+
+    def create_container(self, X_output, X_original, columns, inplace=True):
+        pd = check_library_installed("pandas")
+        columns = get_columns(columns)
+
+        if not inplace or not isinstance(X_output, pd.DataFrame):
+            # In all these cases, we need to create a new DataFrame
+
+            # Unfortunately, we cannot use `getattr(container, "index")`
+            # because `list` exposes an `index` attribute.
+            if isinstance(X_output, pd.DataFrame):
+                index = X_output.index
+            elif isinstance(X_original, pd.DataFrame):
+                index = X_original.index
+            else:
+                index = None
+
+            # We don't pass columns here because it would intend columns selection
+            # instead of renaming.
+            X_output = pd.DataFrame(X_output, index=index, copy=not inplace)
+
+        if columns is not None:
+            return self.rename_columns(X_output, columns)
+        return X_output
+
+    def is_supported_container(self, X):
+        pd = check_library_installed("pandas")
+        return isinstance(X, pd.DataFrame)
+
+    def rename_columns(self, X, columns):
+        # we cannot use `rename` since it takes a dictionary and at this stage we have
+        # potentially duplicate column names in `X`
+        X.columns = columns
+        return X
+
+    def hstack(self, Xs):
+        pd = check_library_installed("pandas")
+        return pd.concat(Xs, axis=1)
+
+
+class PolarsAdapter:
+    container_lib = "polars"
+
+    def create_container(self, X_output, X_original, columns, inplace=True):
+        pl = check_library_installed("polars")
+        columns = get_columns(columns)
+        columns = columns.tolist() if isinstance(columns, np.ndarray) else columns
+
+        if not inplace or not isinstance(X_output, pl.DataFrame):
+            # In all these cases, we need to create a new DataFrame
+            return pl.DataFrame(X_output, schema=columns, orient="row")
+
+        if columns is not None:
+            return self.rename_columns(X_output, columns)
+        return X_output
+
+    def is_supported_container(self, X):
+        pl = check_library_installed("polars")
+        return isinstance(X, pl.DataFrame)
+
+    def rename_columns(self, X, columns):
+        # we cannot use `rename` since it takes a dictionary and at this stage we have
+        # potentially duplicate column names in `X`
+        X.columns = columns
+        return X
+
+    def hstack(self, Xs):
+        pl = check_library_installed("polars")
+        return pl.concat(Xs, how="horizontal")
+
+
+class ContainerAdaptersManager:
+    def __init__(self):
+        self.adapters = {}
+
+    @property
+    def supported_outputs(self):
+        return {"default"} | set(self.adapters)
+
+    def register(self, adapter):
+        self.adapters[adapter.container_lib] = adapter
+
+
+ADAPTERS_MANAGER = ContainerAdaptersManager()
+ADAPTERS_MANAGER.register(PandasAdapter())
+ADAPTERS_MANAGER.register(PolarsAdapter())
+
+
+def _get_adapter_from_container(container):
+    """Get the adapter that knows how to handle such container.
+
+    See :class:`sklearn.utils._set_output.ContainerAdapterProtocol` for more
+    details.
+    """
+    module_name = container.__class__.__module__.split(".")[0]
+    try:
+        return ADAPTERS_MANAGER.adapters[module_name]
+    except KeyError as exc:
+        available_adapters = list(ADAPTERS_MANAGER.adapters.keys())
+        raise ValueError(
+            "The container does not have a registered adapter in scikit-learn. "
+            f"Available adapters are: {available_adapters} while the container "
+            f"provided is: {container!r}."
+        ) from exc
+
+
+def _get_container_adapter(method, estimator=None):
+    """Get container adapter."""
+    dense_config = _get_output_config(method, estimator)["dense"]
+    try:
+        return ADAPTERS_MANAGER.adapters[dense_config]
+    except KeyError:
+        return None
+
+
+def _get_output_config(method, estimator=None):
+    """Get output config based on estimator and global configuration.
+
+    Parameters
+    ----------
+    method : {"transform"}
+        Estimator's method for which the output container is looked up.
+
+    estimator : estimator instance or None
+        Estimator to get the output configuration from. If `None`, check global
+        configuration is used.
+
+    Returns
+    -------
+    config : dict
+        Dictionary with keys:
+
+        - "dense": specifies the dense container for `method`. This can be
+          `"default"` or `"pandas"`.
+    """
+    est_sklearn_output_config = getattr(estimator, "_sklearn_output_config", {})
+    if method in est_sklearn_output_config:
+        dense_config = est_sklearn_output_config[method]
+    else:
+        dense_config = get_config()[f"{method}_output"]
+
+    supported_outputs = ADAPTERS_MANAGER.supported_outputs
+    if dense_config not in supported_outputs:
+        raise ValueError(
+            f"output config must be in {sorted(supported_outputs)}, got {dense_config}"
+        )
+
+    return {"dense": dense_config}
+
+
+def _wrap_data_with_container(method, data_to_wrap, original_input, estimator):
+    """Wrap output with container based on an estimator's or global config.
+
+    Parameters
+    ----------
+    method : {"transform"}
+        Estimator's method to get container output for.
+
+    data_to_wrap : {ndarray, dataframe}
+        Data to wrap with container.
+
+    original_input : {ndarray, dataframe}
+        Original input of function.
+
+    estimator : estimator instance
+        Estimator with to get the output configuration from.
+
+    Returns
+    -------
+    output : {ndarray, dataframe}
+        If the output config is "default" or the estimator is not configured
+        for wrapping return `data_to_wrap` unchanged.
+        If the output config is "pandas", return `data_to_wrap` as a pandas
+        DataFrame.
+    """
+    output_config = _get_output_config(method, estimator)
+
+    if output_config["dense"] == "default" or not _auto_wrap_is_configured(estimator):
+        return data_to_wrap
+
+    dense_config = output_config["dense"]
+    if issparse(data_to_wrap):
+        raise ValueError(
+            "The transformer outputs a scipy sparse matrix. "
+            "Try to set the transformer output to a dense array or disable "
+            f"{dense_config.capitalize()} output with set_output(transform='default')."
+        )
+
+    adapter = ADAPTERS_MANAGER.adapters[dense_config]
+    return adapter.create_container(
+        data_to_wrap,
+        original_input,
+        columns=estimator.get_feature_names_out,
+    )
+
+
+def _wrap_method_output(f, method):
+    """Wrapper used by `_SetOutputMixin` to automatically wrap methods."""
+
+    @wraps(f)
+    def wrapped(self, X, *args, **kwargs):
+        data_to_wrap = f(self, X, *args, **kwargs)
+        if isinstance(data_to_wrap, tuple):
+            # only wrap the first output for cross decomposition
+            return_tuple = (
+                _wrap_data_with_container(method, data_to_wrap[0], X, self),
+                *data_to_wrap[1:],
+            )
+            # Support for namedtuples `_make` is a documented API for namedtuples:
+            # https://docs.python.org/3/library/collections.html#collections.somenamedtuple._make
+            if hasattr(type(data_to_wrap), "_make"):
+                return type(data_to_wrap)._make(return_tuple)
+            return return_tuple
+
+        return _wrap_data_with_container(method, data_to_wrap, X, self)
+
+    return wrapped
+
+
+def _auto_wrap_is_configured(estimator):
+    """Return True if estimator is configured for auto-wrapping the transform method.
+
+    `_SetOutputMixin` sets `_sklearn_auto_wrap_output_keys` to `set()` if auto wrapping
+    is manually disabled.
+    """
+    auto_wrap_output_keys = getattr(estimator, "_sklearn_auto_wrap_output_keys", set())
+    return (
+        hasattr(estimator, "get_feature_names_out")
+        and "transform" in auto_wrap_output_keys
+    )
+
+
+class _SetOutputMixin:
+    """Mixin that dynamically wraps methods to return container based on config.
+
+    Currently `_SetOutputMixin` wraps `transform` and `fit_transform` and configures
+    it based on `set_output` of the global configuration.
+
+    `set_output` is only defined if `get_feature_names_out` is defined and
+    `auto_wrap_output_keys` is the default value.
+    """
+
+    def __init_subclass__(cls, auto_wrap_output_keys=("transform",), **kwargs):
+        super().__init_subclass__(**kwargs)
+
+        # Dynamically wraps `transform` and `fit_transform` and configure it's
+        # output based on `set_output`.
+        if not (
+            isinstance(auto_wrap_output_keys, tuple) or auto_wrap_output_keys is None
+        ):
+            raise ValueError("auto_wrap_output_keys must be None or a tuple of keys.")
+
+        if auto_wrap_output_keys is None:
+            cls._sklearn_auto_wrap_output_keys = set()
+            return
+
+        # Mapping from method to key in configurations
+        method_to_key = {
+            "transform": "transform",
+            "fit_transform": "transform",
+        }
+        cls._sklearn_auto_wrap_output_keys = set()
+
+        for method, key in method_to_key.items():
+            if not hasattr(cls, method) or key not in auto_wrap_output_keys:
+                continue
+            cls._sklearn_auto_wrap_output_keys.add(key)
+
+            # Only wrap methods defined by cls itself
+            if method not in cls.__dict__:
+                continue
+            wrapped_method = _wrap_method_output(getattr(cls, method), key)
+            setattr(cls, method, wrapped_method)
+
+    @available_if(_auto_wrap_is_configured)
+    def set_output(self, *, transform=None):
+        """Set output container.
+
+        See :ref:`sphx_glr_auto_examples_miscellaneous_plot_set_output.py`
+        for an example on how to use the API.
+
+        Parameters
+        ----------
+        transform : {"default", "pandas", "polars"}, default=None
+            Configure output of `transform` and `fit_transform`.
+
+            - `"default"`: Default output format of a transformer
+            - `"pandas"`: DataFrame output
+            - `"polars"`: Polars output
+            - `None`: Transform configuration is unchanged
+
+            .. versionadded:: 1.4
+                `"polars"` option was added.
+
+        Returns
+        -------
+        self : estimator instance
+            Estimator instance.
+        """
+        if transform is None:
+            return self
+
+        if not hasattr(self, "_sklearn_output_config"):
+            self._sklearn_output_config = {}
+
+        self._sklearn_output_config["transform"] = transform
+        return self
+
+
+def _safe_set_output(estimator, *, transform=None):
+    """Safely call estimator.set_output and error if it not available.
+
+    This is used by meta-estimators to set the output for child estimators.
+
+    Parameters
+    ----------
+    estimator : estimator instance
+        Estimator instance.
+
+    transform : {"default", "pandas", "polars"}, default=None
+        Configure output of the following estimator's methods:
+
+        - `"transform"`
+        - `"fit_transform"`
+
+        If `None`, this operation is a no-op.
+
+    Returns
+    -------
+    estimator : estimator instance
+        Estimator instance.
+    """
+    set_output_for_transform = (
+        hasattr(estimator, "transform")
+        or hasattr(estimator, "fit_transform")
+        and transform is not None
+    )
+    if not set_output_for_transform:
+        # If estimator can not transform, then `set_output` does not need to be
+        # called.
+        return
+
+    if not hasattr(estimator, "set_output"):
+        raise ValueError(
+            f"Unable to configure output for {estimator} because `set_output` "
+            "is not available."
+        )
+    return estimator.set_output(transform=transform)
@@ -0,0 +1,114 @@
+"""
+Utility methods to print system info for debugging
+
+adapted from :func:`pandas.show_versions`
+"""
+
+# License: BSD 3 clause
+
+import platform
+import sys
+
+from threadpoolctl import threadpool_info
+
+from .. import __version__
+from ._openmp_helpers import _openmp_parallelism_enabled
+
+
+def _get_sys_info():
+    """System information
+
+    Returns
+    -------
+    sys_info : dict
+        system and Python version information
+
+    """
+    python = sys.version.replace("\n", " ")
+
+    blob = [
+        ("python", python),
+        ("executable", sys.executable),
+        ("machine", platform.platform()),
+    ]
+
+    return dict(blob)
+
+
+def _get_deps_info():
+    """Overview of the installed version of main dependencies
+
+    This function does not import the modules to collect the version numbers
+    but instead relies on standard Python package metadata.
+
+    Returns
+    -------
+    deps_info: dict
+        version information on relevant Python libraries
+
+    """
+    deps = [
+        "pip",
+        "setuptools",
+        "numpy",
+        "scipy",
+        "Cython",
+        "pandas",
+        "matplotlib",
+        "joblib",
+        "threadpoolctl",
+    ]
+
+    deps_info = {
+        "sklearn": __version__,
+    }
+
+    from importlib.metadata import PackageNotFoundError, version
+
+    for modname in deps:
+        try:
+            deps_info[modname] = version(modname)
+        except PackageNotFoundError:
+            deps_info[modname] = None
+    return deps_info
+
+
+def show_versions():
+    """Print useful debugging information"
+
+    .. versionadded:: 0.20
+
+    Examples
+    --------
+    >>> from sklearn import show_versions
+    >>> show_versions()  # doctest: +SKIP
+    """
+
+    sys_info = _get_sys_info()
+    deps_info = _get_deps_info()
+
+    print("\nSystem:")
+    for k, stat in sys_info.items():
+        print("{k:>10}: {stat}".format(k=k, stat=stat))
+
+    print("\nPython dependencies:")
+    for k, stat in deps_info.items():
+        print("{k:>13}: {stat}".format(k=k, stat=stat))
+
+    print(
+        "\n{k}: {stat}".format(
+            k="Built with OpenMP", stat=_openmp_parallelism_enabled()
+        )
+    )
+
+    # show threadpoolctl results
+    threadpool_results = threadpool_info()
+    if threadpool_results:
+        print()
+        print("threadpoolctl info:")
+
+        for i, result in enumerate(threadpool_results):
+            for key, val in result.items():
+                print(f"{key:>15}: {val}")
+            if i != len(threadpool_results) - 1:
+                print()
@@ -0,0 +1,9 @@
+from ._typedefs cimport intp_t
+
+from cython cimport floating
+
+cdef int simultaneous_sort(
+    floating *dist,
+    intp_t *idx,
+    intp_t size,
+) noexcept nogil
@@ -0,0 +1,93 @@
+from cython cimport floating
+
+cdef inline void dual_swap(
+    floating* darr,
+    intp_t *iarr,
+    intp_t a,
+    intp_t b,
+) noexcept nogil:
+    """Swap the values at index a and b of both darr and iarr"""
+    cdef floating dtmp = darr[a]
+    darr[a] = darr[b]
+    darr[b] = dtmp
+
+    cdef intp_t itmp = iarr[a]
+    iarr[a] = iarr[b]
+    iarr[b] = itmp
+
+
+cdef int simultaneous_sort(
+    floating* values,
+    intp_t* indices,
+    intp_t size,
+) noexcept nogil:
+    """
+    Perform a recursive quicksort on the values array as to sort them ascendingly.
+    This simultaneously performs the swaps on both the values and the indices arrays.
+
+    The numpy equivalent is:
+
+        def simultaneous_sort(dist, idx):
+             i = np.argsort(dist)
+             return dist[i], idx[i]
+
+    Notes
+    -----
+    Arrays are manipulated via a pointer to there first element and their size
+    as to ease the processing of dynamically allocated buffers.
+    """
+    # TODO: In order to support discrete distance metrics, we need to have a
+    # simultaneous sort which breaks ties on indices when distances are identical.
+    # The best might be using a std::stable_sort and a Comparator which might need
+    # an Array of Structures (AoS) instead of the Structure of Arrays (SoA)
+    # currently used.
+    cdef:
+        intp_t pivot_idx, i, store_idx
+        floating pivot_val
+
+    # in the small-array case, do things efficiently
+    if size <= 1:
+        pass
+    elif size == 2:
+        if values[0] > values[1]:
+            dual_swap(values, indices, 0, 1)
+    elif size == 3:
+        if values[0] > values[1]:
+            dual_swap(values, indices, 0, 1)
+        if values[1] > values[2]:
+            dual_swap(values, indices, 1, 2)
+            if values[0] > values[1]:
+                dual_swap(values, indices, 0, 1)
+    else:
+        # Determine the pivot using the median-of-three rule.
+        # The smallest of the three is moved to the beginning of the array,
+        # the middle (the pivot value) is moved to the end, and the largest
+        # is moved to the pivot index.
+        pivot_idx = size // 2
+        if values[0] > values[size - 1]:
+            dual_swap(values, indices, 0, size - 1)
+        if values[size - 1] > values[pivot_idx]:
+            dual_swap(values, indices, size - 1, pivot_idx)
+            if values[0] > values[size - 1]:
+                dual_swap(values, indices, 0, size - 1)
+        pivot_val = values[size - 1]
+
+        # Partition indices about pivot.  At the end of this operation,
+        # pivot_idx will contain the pivot value, everything to the left
+        # will be smaller, and everything to the right will be larger.
+        store_idx = 0
+        for i in range(size - 1):
+            if values[i] < pivot_val:
+                dual_swap(values, indices, i, store_idx)
+                store_idx += 1
+        dual_swap(values, indices, store_idx, size - 1)
+        pivot_idx = store_idx
+
+        # Recursively sort each side of the pivot
+        if pivot_idx > 1:
+            simultaneous_sort(values, indices, pivot_idx)
+        if pivot_idx + 2 < size:
+            simultaneous_sort(values + pivot_idx + 1,
+                              indices + pivot_idx + 1,
+                              size - pivot_idx - 1)
+    return 0
@@ -0,0 +1,68 @@
+import numpy as np
+
+_DEFAULT_TAGS = {
+    "array_api_support": False,
+    "non_deterministic": False,
+    "requires_positive_X": False,
+    "requires_positive_y": False,
+    "X_types": ["2darray"],
+    "poor_score": False,
+    "no_validation": False,
+    "multioutput": False,
+    "allow_nan": False,
+    "stateless": False,
+    "multilabel": False,
+    "_skip_test": False,
+    "_xfail_checks": False,
+    "multioutput_only": False,
+    "binary_only": False,
+    "requires_fit": True,
+    "preserves_dtype": [np.float64],
+    "requires_y": False,
+    "pairwise": False,
+}
+
+
+def _safe_tags(estimator, key=None):
+    """Safely get estimator tags.
+
+    :class:`~sklearn.BaseEstimator` provides the estimator tags machinery.
+    However, if an estimator does not inherit from this base class, we should
+    fall-back to the default tags.
+
+    For scikit-learn built-in estimators, we should still rely on
+    `self._get_tags()`. `_safe_tags(est)` should be used when we are not sure
+    where `est` comes from: typically `_safe_tags(self.base_estimator)` where
+    `self` is a meta-estimator, or in the common checks.
+
+    Parameters
+    ----------
+    estimator : estimator object
+        The estimator from which to get the tag.
+
+    key : str, default=None
+        Tag name to get. By default (`None`), all tags are returned.
+
+    Returns
+    -------
+    tags : dict or tag value
+        The estimator tags. A single value is returned if `key` is not None.
+    """
+    if hasattr(estimator, "_get_tags"):
+        tags_provider = "_get_tags()"
+        tags = estimator._get_tags()
+    elif hasattr(estimator, "_more_tags"):
+        tags_provider = "_more_tags()"
+        tags = {**_DEFAULT_TAGS, **estimator._more_tags()}
+    else:
+        tags_provider = "_DEFAULT_TAGS"
+        tags = _DEFAULT_TAGS
+
+    if key is not None:
+        if key not in tags:
+            raise ValueError(
+                f"The key {key} is not defined in {tags_provider} for the "
+                f"class {estimator.__class__.__name__}."
+            )
+        return tags[key]
+    return tags
@@ -0,0 +1,41 @@
+# Commonly used types
+# These are redefinitions of the ones defined by numpy in
+# https://github.com/numpy/numpy/blob/main/numpy/__init__.pxd.
+# It will eventually avoid having to always include the numpy headers even when we
+# would only use it for the types.
+#
+# When used to declare variables that will receive values from numpy arrays, it
+# should match the dtype of the array. For example, to declare a variable that will
+# receive values from a numpy array of dtype np.float64, the type float64_t must be
+# used.
+#
+# TODO: Stop defining custom types locally or globally like DTYPE_t and friends and
+# use these consistently throughout the codebase.
+# NOTE: Extend this list as needed when converting more cython extensions.
+ctypedef unsigned char uint8_t
+ctypedef unsigned int uint32_t
+ctypedef unsigned long long uint64_t
+# Note: In NumPy 2, indexing always happens with npy_intp which is an alias for
+# the Py_ssize_t type, see PEP 353.
+#
+# Note that on most platforms Py_ssize_t is equivalent to C99's intptr_t,
+# but they can differ on architecture with segmented memory (none
+# supported by scikit-learn at the time of writing).
+#
+# intp_t/np.intp should be used to index arrays in a platform dependent way.
+# Storing arrays with platform dependent dtypes as attribute on picklable
+# objects is not recommended as it requires special care when loading and
+# using such datastructures on a host with different bitness. Instead one
+# should rather use fixed width integer types such as int32 or uint32 when we know
+# that the number of elements to index is not larger to 2 or 4 billions.
+ctypedef Py_ssize_t intp_t
+ctypedef float float32_t
+ctypedef double float64_t
+# Sparse matrices indices and indices' pointers arrays must use int32_t over
+# intp_t because intp_t is platform dependent.
+# When large sparse matrices are supported, indexing must use int64_t.
+# See https://github.com/scikit-learn/scikit-learn/issues/23653 which tracks the
+# ongoing work to support large sparse matrices.
+ctypedef signed char int8_t
+ctypedef signed int int32_t
+ctypedef signed long long int64_t
@@ -0,0 +1,23 @@
+# _typedefs is a declaration only module
+#
+# The functions implemented here are for testing purpose only.
+
+
+import numpy as np
+
+
+ctypedef fused testing_type_t:
+    float32_t
+    float64_t
+    int8_t
+    int32_t
+    int64_t
+    intp_t
+    uint8_t
+    uint32_t
+    uint64_t
+
+
+def testing_make_array_from_typed_val(testing_type_t val):
+    cdef testing_type_t[:] val_view = <testing_type_t[:1]>&val
+    return np.asarray(val_view)
@@ -0,0 +1,54 @@
+import timeit
+from contextlib import contextmanager
+
+
+def _message_with_time(source, message, time):
+    """Create one line message for logging purposes.
+
+    Parameters
+    ----------
+    source : str
+        String indicating the source or the reference of the message.
+
+    message : str
+        Short message.
+
+    time : int
+        Time in seconds.
+    """
+    start_message = "[%s] " % source
+
+    # adapted from joblib.logger.short_format_time without the Windows -.1s
+    # adjustment
+    if time > 60:
+        time_str = "%4.1fmin" % (time / 60)
+    else:
+        time_str = " %5.1fs" % time
+    end_message = " %s, total=%s" % (message, time_str)
+    dots_len = 70 - len(start_message) - len(end_message)
+    return "%s%s%s" % (start_message, dots_len * ".", end_message)
+
+
+@contextmanager
+def _print_elapsed_time(source, message=None):
+    """Log elapsed time to stdout when the context is exited.
+
+    Parameters
+    ----------
+    source : str
+        String indicating the source or the reference of the message.
+
+    message : str, default=None
+        Short message. If None, nothing will be printed.
+
+    Returns
+    -------
+    context_manager
+        Prints elapsed time upon exit if verbose.
+    """
+    if message is None:
+        yield
+    else:
+        start = timeit.default_timer()
+        yield
+        print(_message_with_time(source, message, timeit.default_timer() - start))
@@ -0,0 +1,12 @@
+cimport numpy as cnp
+
+from libcpp.vector cimport vector
+from ..utils._typedefs cimport intp_t, float64_t, int32_t, int64_t
+
+ctypedef fused vector_typed:
+    vector[float64_t]
+    vector[intp_t]
+    vector[int32_t]
+    vector[int64_t]
+
+cdef cnp.ndarray vector_to_nd_array(vector_typed * vect_ptr)
@@ -0,0 +1,118 @@
+from cython.operator cimport dereference as deref
+from cpython.ref cimport Py_INCREF
+cimport numpy as cnp
+
+cnp.import_array()
+
+
+cdef StdVectorSentinel _create_sentinel(vector_typed * vect_ptr):
+    if vector_typed is vector[float64_t]:
+        return StdVectorSentinelFloat64.create_for(vect_ptr)
+    elif vector_typed is vector[int32_t]:
+        return StdVectorSentinelInt32.create_for(vect_ptr)
+    elif vector_typed is vector[int64_t]:
+        return StdVectorSentinelInt64.create_for(vect_ptr)
+    else:  # intp_t
+        return StdVectorSentinelIntP.create_for(vect_ptr)
+
+
+cdef class StdVectorSentinel:
+    """Wraps a reference to a vector which will be deallocated with this object.
+
+    When created, the StdVectorSentinel swaps the reference of its internal
+    vectors with the provided one (vect_ptr), thus making the StdVectorSentinel
+    manage the provided one's lifetime.
+    """
+    cdef void* get_data(self):
+        """Return pointer to data."""
+
+    cdef int get_typenum(self):
+        """Get typenum for PyArray_SimpleNewFromData."""
+
+
+cdef class StdVectorSentinelFloat64(StdVectorSentinel):
+    cdef vector[float64_t] vec
+
+    @staticmethod
+    cdef StdVectorSentinel create_for(vector[float64_t] * vect_ptr):
+        # This initializes the object directly without calling __init__
+        # See: https://cython.readthedocs.io/en/latest/src/userguide/extension_types.html#instantiation-from-existing-c-c-pointers # noqa
+        cdef StdVectorSentinelFloat64 sentinel = StdVectorSentinelFloat64.__new__(StdVectorSentinelFloat64)
+        sentinel.vec.swap(deref(vect_ptr))
+        return sentinel
+
+    cdef void* get_data(self):
+        return self.vec.data()
+
+    cdef int get_typenum(self):
+        return cnp.NPY_FLOAT64
+
+
+cdef class StdVectorSentinelIntP(StdVectorSentinel):
+    cdef vector[intp_t] vec
+
+    @staticmethod
+    cdef StdVectorSentinel create_for(vector[intp_t] * vect_ptr):
+        # This initializes the object directly without calling __init__
+        # See: https://cython.readthedocs.io/en/latest/src/userguide/extension_types.html#instantiation-from-existing-c-c-pointers # noqa
+        cdef StdVectorSentinelIntP sentinel = StdVectorSentinelIntP.__new__(StdVectorSentinelIntP)
+        sentinel.vec.swap(deref(vect_ptr))
+        return sentinel
+
+    cdef void* get_data(self):
+        return self.vec.data()
+
+    cdef int get_typenum(self):
+        return cnp.NPY_INTP
+
+
+cdef class StdVectorSentinelInt32(StdVectorSentinel):
+    cdef vector[int32_t] vec
+
+    @staticmethod
+    cdef StdVectorSentinel create_for(vector[int32_t] * vect_ptr):
+        # This initializes the object directly without calling __init__
+        # See: https://cython.readthedocs.io/en/latest/src/userguide/extension_types.html#instantiation-from-existing-c-c-pointers # noqa
+        cdef StdVectorSentinelInt32 sentinel = StdVectorSentinelInt32.__new__(StdVectorSentinelInt32)
+        sentinel.vec.swap(deref(vect_ptr))
+        return sentinel
+
+    cdef void* get_data(self):
+        return self.vec.data()
+
+    cdef int get_typenum(self):
+        return cnp.NPY_INT32
+
+
+cdef class StdVectorSentinelInt64(StdVectorSentinel):
+    cdef vector[int64_t] vec
+
+    @staticmethod
+    cdef StdVectorSentinel create_for(vector[int64_t] * vect_ptr):
+        # This initializes the object directly without calling __init__
+        # See: https://cython.readthedocs.io/en/latest/src/userguide/extension_types.html#instantiation-from-existing-c-c-pointers # noqa
+        cdef StdVectorSentinelInt64 sentinel = StdVectorSentinelInt64.__new__(StdVectorSentinelInt64)
+        sentinel.vec.swap(deref(vect_ptr))
+        return sentinel
+
+    cdef void* get_data(self):
+        return self.vec.data()
+
+    cdef int get_typenum(self):
+        return cnp.NPY_INT64
+
+
+cdef cnp.ndarray vector_to_nd_array(vector_typed * vect_ptr):
+    cdef:
+        cnp.npy_intp size = deref(vect_ptr).size()
+        StdVectorSentinel sentinel = _create_sentinel(vect_ptr)
+        cnp.ndarray arr = cnp.PyArray_SimpleNewFromData(
+            1, &size, sentinel.get_typenum(), sentinel.get_data())
+
+    # Makes the numpy array responsible of the life-cycle of its buffer.
+    # A reference to the StdVectorSentinel will be stolen by the call to
+    # `PyArray_SetBaseObject` below, so we increase its reference counter.
+    # See: https://docs.python.org/3/c-api/intro.html#reference-count-details
+    Py_INCREF(sentinel)
+    cnp.PyArray_SetBaseObject(arr, sentinel)
+    return arr
@@ -0,0 +1,45 @@
+{{py:
+
+"""
+Efficient (dense) parameter vector implementation for linear models.
+
+Template file for easily generate fused types consistent code using Tempita
+(https://github.com/cython/cython/blob/master/Cython/Tempita/_tempita.py).
+
+Generated file: weight_vector.pxd
+
+Each class is duplicated for all dtypes (float and double). The keywords
+between double braces are substituted in setup.py.
+"""
+
+# name_suffix, c_type
+dtypes = [('64', 'double'),
+          ('32', 'float')]
+
+}}
+
+{{for name_suffix, c_type in dtypes}}
+
+cdef class WeightVector{{name_suffix}}(object):
+    cdef readonly {{c_type}}[::1] w
+    cdef readonly {{c_type}}[::1] aw
+    cdef {{c_type}} *w_data_ptr
+    cdef {{c_type}} *aw_data_ptr
+
+    cdef double wscale
+    cdef double average_a
+    cdef double average_b
+    cdef int n_features
+    cdef double sq_norm
+
+    cdef void add(self, {{c_type}} *x_data_ptr, int *x_ind_ptr,
+                  int xnnz, {{c_type}} c) noexcept nogil
+    cdef void add_average(self, {{c_type}} *x_data_ptr, int *x_ind_ptr,
+                          int xnnz, {{c_type}} c, {{c_type}} num_iter) noexcept nogil
+    cdef {{c_type}} dot(self, {{c_type}} *x_data_ptr, int *x_ind_ptr,
+                    int xnnz) noexcept nogil
+    cdef void scale(self, {{c_type}} c) noexcept nogil
+    cdef void reset_wscale(self) noexcept nogil
+    cdef {{c_type}} norm(self) noexcept nogil
+
+{{endfor}}
@@ -0,0 +1,210 @@
+{{py:
+
+"""
+Efficient (dense) parameter vector implementation for linear models.
+
+Template file for easily generate fused types consistent code using Tempita
+(https://github.com/cython/cython/blob/master/Cython/Tempita/_tempita.py).
+
+Generated file: weight_vector.pxd
+
+Each class is duplicated for all dtypes (float and double). The keywords
+between double braces are substituted in setup.py.
+"""
+
+# name_suffix, c_type, reset_wscale_threshold
+dtypes = [('64', 'double', 1e-9),
+          ('32', 'float', 1e-6)]
+
+}}
+
+# cython: binding=False
+#
+# Author: Peter Prettenhofer <peter.prettenhofer@gmail.com>
+#         Lars Buitinck
+#         Danny Sullivan <dsullivan7@hotmail.com>
+#
+# License: BSD 3 clause
+
+cimport cython
+from libc.limits cimport INT_MAX
+from libc.math cimport sqrt
+
+from ._cython_blas cimport _dot, _scal, _axpy
+
+{{for name_suffix, c_type, reset_wscale_threshold in dtypes}}
+
+cdef class WeightVector{{name_suffix}}(object):
+    """Dense vector represented by a scalar and a numpy array.
+
+    The class provides methods to ``add`` a sparse vector
+    and scale the vector.
+    Representing a vector explicitly as a scalar times a
+    vector allows for efficient scaling operations.
+
+    Attributes
+    ----------
+    w : ndarray, dtype={{c_type}}, order='C'
+        The numpy array which backs the weight vector.
+    aw : ndarray, dtype={{c_type}}, order='C'
+        The numpy array which backs the average_weight vector.
+    w_data_ptr : {{c_type}}*
+        A pointer to the data of the numpy array.
+    wscale : {{c_type}}
+        The scale of the vector.
+    n_features : int
+        The number of features (= dimensionality of ``w``).
+    sq_norm : {{c_type}}
+        The squared norm of ``w``.
+    """
+
+    def __cinit__(self,
+                  {{c_type}}[::1] w,
+                  {{c_type}}[::1] aw):
+
+        if w.shape[0] > INT_MAX:
+            raise ValueError("More than %d features not supported; got %d."
+                             % (INT_MAX, w.shape[0]))
+        self.w = w
+        self.w_data_ptr = &w[0]
+        self.wscale = 1.0
+        self.n_features = w.shape[0]
+        self.sq_norm = _dot(self.n_features, self.w_data_ptr, 1, self.w_data_ptr, 1)
+
+        self.aw = aw
+        if self.aw is not None:
+            self.aw_data_ptr = &aw[0]
+            self.average_a = 0.0
+            self.average_b = 1.0
+
+    cdef void add(self, {{c_type}} *x_data_ptr, int *x_ind_ptr, int xnnz,
+                  {{c_type}} c) noexcept nogil:
+        """Scales sample x by constant c and adds it to the weight vector.
+
+        This operation updates ``sq_norm``.
+
+        Parameters
+        ----------
+        x_data_ptr : {{c_type}}*
+            The array which holds the feature values of ``x``.
+        x_ind_ptr : np.intc*
+            The array which holds the feature indices of ``x``.
+        xnnz : int
+            The number of non-zero features of ``x``.
+        c : {{c_type}}
+            The scaling constant for the example.
+        """
+        cdef int j
+        cdef int idx
+        cdef double val
+        cdef double innerprod = 0.0
+        cdef double xsqnorm = 0.0
+
+        # the next two lines save a factor of 2!
+        cdef {{c_type}} wscale = self.wscale
+        cdef {{c_type}}* w_data_ptr = self.w_data_ptr
+
+        for j in range(xnnz):
+            idx = x_ind_ptr[j]
+            val = x_data_ptr[j]
+            innerprod += (w_data_ptr[idx] * val)
+            xsqnorm += (val * val)
+            w_data_ptr[idx] += val * (c / wscale)
+
+        self.sq_norm += (xsqnorm * c * c) + (2.0 * innerprod * wscale * c)
+
+    # Update the average weights according to the sparse trick defined
+    # here: https://research.microsoft.com/pubs/192769/tricks-2012.pdf
+    # by Leon Bottou
+    cdef void add_average(self, {{c_type}} *x_data_ptr, int *x_ind_ptr, int xnnz,
+                          {{c_type}} c, {{c_type}} num_iter) noexcept nogil:
+        """Updates the average weight vector.
+
+        Parameters
+        ----------
+        x_data_ptr : {{c_type}}*
+            The array which holds the feature values of ``x``.
+        x_ind_ptr : np.intc*
+            The array which holds the feature indices of ``x``.
+        xnnz : int
+            The number of non-zero features of ``x``.
+        c : {{c_type}}
+            The scaling constant for the example.
+        num_iter : {{c_type}}
+            The total number of iterations.
+        """
+        cdef int j
+        cdef int idx
+        cdef double val
+        cdef double mu = 1.0 / num_iter
+        cdef double average_a = self.average_a
+        cdef double wscale = self.wscale
+        cdef {{c_type}}* aw_data_ptr = self.aw_data_ptr
+
+        for j in range(xnnz):
+            idx = x_ind_ptr[j]
+            val = x_data_ptr[j]
+            aw_data_ptr[idx] += (self.average_a * val * (-c / wscale))
+
+        # Once the sample has been processed
+        # update the average_a and average_b
+        if num_iter > 1:
+            self.average_b /= (1.0 - mu)
+        self.average_a += mu * self.average_b * wscale
+
+    cdef {{c_type}} dot(self, {{c_type}} *x_data_ptr, int *x_ind_ptr,
+                    int xnnz) noexcept nogil:
+        """Computes the dot product of a sample x and the weight vector.
+
+        Parameters
+        ----------
+        x_data_ptr : {{c_type}}*
+            The array which holds the feature values of ``x``.
+        x_ind_ptr : np.intc*
+            The array which holds the feature indices of ``x``.
+        xnnz : int
+            The number of non-zero features of ``x`` (length of x_ind_ptr).
+
+        Returns
+        -------
+        innerprod : {{c_type}}
+            The inner product of ``x`` and ``w``.
+        """
+        cdef int j
+        cdef int idx
+        cdef double innerprod = 0.0
+        cdef {{c_type}}* w_data_ptr = self.w_data_ptr
+        for j in range(xnnz):
+            idx = x_ind_ptr[j]
+            innerprod += w_data_ptr[idx] * x_data_ptr[j]
+        innerprod *= self.wscale
+        return innerprod
+
+    cdef void scale(self, {{c_type}} c) noexcept nogil:
+        """Scales the weight vector by a constant ``c``.
+
+        It updates ``wscale`` and ``sq_norm``. If ``wscale`` gets too
+        small we call ``reset_swcale``."""
+        self.wscale *= c
+        self.sq_norm *= (c * c)
+
+        if self.wscale < {{reset_wscale_threshold}}:
+            self.reset_wscale()
+
+    cdef void reset_wscale(self) noexcept nogil:
+        """Scales each coef of ``w`` by ``wscale`` and resets it to 1. """
+        if self.aw_data_ptr != NULL:
+            _axpy(self.n_features, self.average_a,
+                  self.w_data_ptr, 1, self.aw_data_ptr, 1)
+            _scal(self.n_features, 1.0 / self.average_b, self.aw_data_ptr, 1)
+            self.average_a = 0.0
+            self.average_b = 1.0
+
+        _scal(self.n_features, self.wscale, self.w_data_ptr, 1)
+        self.wscale = 1.0
+
+    cdef {{c_type}} norm(self) noexcept nogil:
+        """The L2 norm of the weight vector. """
+        return sqrt(self.sq_norm)
+
+{{endfor}}
@@ -0,0 +1,137 @@
+"""
+The :mod:`sklearn.utils.arrayfuncs` module includes a small collection of auxiliary
+functions that operate on arrays.
+"""
+
+from cython cimport floating
+from cython.parallel cimport prange
+from libc.math cimport fabs
+from libc.float cimport DBL_MAX, FLT_MAX
+
+from ._cython_blas cimport _copy, _rotg, _rot
+from ._typedefs cimport float64_t
+
+
+ctypedef fused real_numeric:
+    short
+    int
+    long
+    long long
+    float
+    double
+
+
+def min_pos(const floating[:] X):
+    """Find the minimum value of an array over positive values.
+
+    Returns the maximum representable value of the input dtype if none of the
+    values are positive.
+
+    Parameters
+    ----------
+    X : ndarray of shape (n,)
+        Input array.
+
+    Returns
+    -------
+    min_val : float
+        The smallest positive value in the array, or the maximum representable value
+         of the input dtype if no positive values are found.
+
+    Examples
+    --------
+    >>> import numpy as np
+    >>> from sklearn.utils.arrayfuncs import min_pos
+    >>> X = np.array([0, -1, 2, 3, -4, 5])
+    >>> min_pos(X)
+    2.0
+    """
+    cdef Py_ssize_t i
+    cdef floating min_val = FLT_MAX if floating is float else DBL_MAX
+    for i in range(X.size):
+        if 0. < X[i] < min_val:
+            min_val = X[i]
+    return min_val
+
+
+def _all_with_any_reduction_axis_1(real_numeric[:, :] array, real_numeric value):
+    """Check whether any row contains all values equal to `value`.
+
+    It is equivalent to `np.any(np.all(X == value, axis=1))`, but it avoids to
+    materialize the temporary boolean matrices in memory.
+
+    Parameters
+    ----------
+    array: array-like
+        The array to be checked.
+    value: short, int, long, float, or double
+        The value to use for the comparison.
+
+    Returns
+    -------
+    any_all_equal: bool
+        Whether or not any rows contains all values equal to `value`.
+    """
+    cdef Py_ssize_t i, j
+
+    for i in range(array.shape[0]):
+        for j in range(array.shape[1]):
+            if array[i, j] != value:
+                break
+        else:  # no break
+            return True
+    return False
+
+
+# General Cholesky Delete.
+# Remove an element from the cholesky factorization
+# m = columns
+# n = rows
+#
+# TODO: put transpose as an option
+def cholesky_delete(floating[:, :] L, int go_out):
+    cdef:
+        int n = L.shape[0]
+        int m = L.strides[0]
+        floating c, s
+        floating *L1
+        int i
+
+    if floating is float:
+        m /= sizeof(float)
+    else:
+        m /= sizeof(double)
+
+    # delete row go_out
+    L1 = &L[0, 0] + (go_out * m)
+    for i in range(go_out, n-1):
+        _copy(i + 2, L1 + m, 1, L1, 1)
+        L1 += m
+
+    L1 = &L[0, 0] + (go_out * m)
+    for i in range(go_out, n-1):
+        _rotg(L1 + i, L1 + i + 1, &c, &s)
+        if L1[i] < 0:
+            # Diagonals cannot be negative
+            L1[i] = fabs(L1[i])
+            c = -c
+            s = -s
+
+        L1[i + 1] = 0.  # just for cleanup
+        L1 += m
+
+        _rot(n - i - 2, L1 + i, m, L1 + i + 1, m, c, s)
+
+
+def sum_parallel(const floating [:] array, int n_threads):
+    """Parallel sum, always using float64 internally."""
+    cdef:
+        float64_t out = 0.
+        int i = 0
+
+    for i in prange(
+        array.shape[0], schedule='static', nogil=True, num_threads=n_threads
+    ):
+        out += array[i]
+
+    return out
@@ -0,0 +1,224 @@
+"""
+The :mod:`sklearn.utils.class_weight` module includes utilities for handling
+weights based on class labels.
+"""
+
+# Authors: Andreas Mueller
+#          Manoj Kumar
+# License: BSD 3 clause
+
+import numpy as np
+from scipy import sparse
+
+from ._param_validation import StrOptions, validate_params
+
+
+@validate_params(
+    {
+        "class_weight": [dict, StrOptions({"balanced"}), None],
+        "classes": [np.ndarray],
+        "y": ["array-like"],
+    },
+    prefer_skip_nested_validation=True,
+)
+def compute_class_weight(class_weight, *, classes, y):
+    """Estimate class weights for unbalanced datasets.
+
+    Parameters
+    ----------
+    class_weight : dict, "balanced" or None
+        If "balanced", class weights will be given by
+        `n_samples / (n_classes * np.bincount(y))`.
+        If a dictionary is given, keys are classes and values are corresponding class
+        weights.
+        If `None` is given, the class weights will be uniform.
+
+    classes : ndarray
+        Array of the classes occurring in the data, as given by
+        `np.unique(y_org)` with `y_org` the original class labels.
+
+    y : array-like of shape (n_samples,)
+        Array of original class labels per sample.
+
+    Returns
+    -------
+    class_weight_vect : ndarray of shape (n_classes,)
+        Array with `class_weight_vect[i]` the weight for i-th class.
+
+    References
+    ----------
+    The "balanced" heuristic is inspired by
+    Logistic Regression in Rare Events Data, King, Zen, 2001.
+
+    Examples
+    --------
+    >>> import numpy as np
+    >>> from sklearn.utils.class_weight import compute_class_weight
+    >>> y = [1, 1, 1, 1, 0, 0]
+    >>> compute_class_weight(class_weight="balanced", classes=np.unique(y), y=y)
+    array([1.5 , 0.75])
+    """
+    # Import error caused by circular imports.
+    from ..preprocessing import LabelEncoder
+
+    if set(y) - set(classes):
+        raise ValueError("classes should include all valid labels that can be in y")
+    if class_weight is None or len(class_weight) == 0:
+        # uniform class weights
+        weight = np.ones(classes.shape[0], dtype=np.float64, order="C")
+    elif class_weight == "balanced":
+        # Find the weight of each class as present in y.
+        le = LabelEncoder()
+        y_ind = le.fit_transform(y)
+        if not all(np.isin(classes, le.classes_)):
+            raise ValueError("classes should have valid labels that are in y")
+
+        recip_freq = len(y) / (len(le.classes_) * np.bincount(y_ind).astype(np.float64))
+        weight = recip_freq[le.transform(classes)]
+    else:
+        # user-defined dictionary
+        weight = np.ones(classes.shape[0], dtype=np.float64, order="C")
+        unweighted_classes = []
+        for i, c in enumerate(classes):
+            if c in class_weight:
+                weight[i] = class_weight[c]
+            else:
+                unweighted_classes.append(c)
+
+        n_weighted_classes = len(classes) - len(unweighted_classes)
+        if unweighted_classes and n_weighted_classes != len(class_weight):
+            unweighted_classes_user_friendly_str = np.array(unweighted_classes).tolist()
+            raise ValueError(
+                f"The classes, {unweighted_classes_user_friendly_str}, are not in"
+                " class_weight"
+            )
+
+    return weight
+
+
+@validate_params(
+    {
+        "class_weight": [dict, list, StrOptions({"balanced"}), None],
+        "y": ["array-like", "sparse matrix"],
+        "indices": ["array-like", None],
+    },
+    prefer_skip_nested_validation=True,
+)
+def compute_sample_weight(class_weight, y, *, indices=None):
+    """Estimate sample weights by class for unbalanced datasets.
+
+    Parameters
+    ----------
+    class_weight : dict, list of dicts, "balanced", or None
+        Weights associated with classes in the form `{class_label: weight}`.
+        If not given, all classes are supposed to have weight one. For
+        multi-output problems, a list of dicts can be provided in the same
+        order as the columns of y.
+
+        Note that for multioutput (including multilabel) weights should be
+        defined for each class of every column in its own dict. For example,
+        for four-class multilabel classification weights should be
+        `[{0: 1, 1: 1}, {0: 1, 1: 5}, {0: 1, 1: 1}, {0: 1, 1: 1}]` instead of
+        `[{1:1}, {2:5}, {3:1}, {4:1}]`.
+
+        The `"balanced"` mode uses the values of y to automatically adjust
+        weights inversely proportional to class frequencies in the input data:
+        `n_samples / (n_classes * np.bincount(y))`.
+
+        For multi-output, the weights of each column of y will be multiplied.
+
+    y : {array-like, sparse matrix} of shape (n_samples,) or (n_samples, n_outputs)
+        Array of original class labels per sample.
+
+    indices : array-like of shape (n_subsample,), default=None
+        Array of indices to be used in a subsample. Can be of length less than
+        `n_samples` in the case of a subsample, or equal to `n_samples` in the
+        case of a bootstrap subsample with repeated indices. If `None`, the
+        sample weight will be calculated over the full sample. Only `"balanced"`
+        is supported for `class_weight` if this is provided.
+
+    Returns
+    -------
+    sample_weight_vect : ndarray of shape (n_samples,)
+        Array with sample weights as applied to the original `y`.
+
+    Examples
+    --------
+    >>> from sklearn.utils.class_weight import compute_sample_weight
+    >>> y = [1, 1, 1, 1, 0, 0]
+    >>> compute_sample_weight(class_weight="balanced", y=y)
+    array([0.75, 0.75, 0.75, 0.75, 1.5 , 1.5 ])
+    """
+
+    # Ensure y is 2D. Sparse matrices are already 2D.
+    if not sparse.issparse(y):
+        y = np.atleast_1d(y)
+        if y.ndim == 1:
+            y = np.reshape(y, (-1, 1))
+    n_outputs = y.shape[1]
+
+    if indices is not None and class_weight != "balanced":
+        raise ValueError(
+            "The only valid class_weight for subsampling is 'balanced'. "
+            f"Given {class_weight}."
+        )
+    elif n_outputs > 1:
+        if class_weight is None or isinstance(class_weight, dict):
+            raise ValueError(
+                "For multi-output, class_weight should be a list of dicts, or the "
+                "string 'balanced'."
+            )
+        elif isinstance(class_weight, list) and len(class_weight) != n_outputs:
+            raise ValueError(
+                "For multi-output, number of elements in class_weight should match "
+                f"number of outputs. Got {len(class_weight)} element(s) while having "
+                f"{n_outputs} outputs."
+            )
+
+    expanded_class_weight = []
+    for k in range(n_outputs):
+        if sparse.issparse(y):
+            # Ok to densify a single column at a time
+            y_full = y[:, [k]].toarray().flatten()
+        else:
+            y_full = y[:, k]
+        classes_full = np.unique(y_full)
+        classes_missing = None
+
+        if class_weight == "balanced" or n_outputs == 1:
+            class_weight_k = class_weight
+        else:
+            class_weight_k = class_weight[k]
+
+        if indices is not None:
+            # Get class weights for the subsample, covering all classes in
+            # case some labels that were present in the original data are
+            # missing from the sample.
+            y_subsample = y_full[indices]
+            classes_subsample = np.unique(y_subsample)
+
+            weight_k = np.take(
+                compute_class_weight(
+                    class_weight_k, classes=classes_subsample, y=y_subsample
+                ),
+                np.searchsorted(classes_subsample, classes_full),
+                mode="clip",
+            )
+
+            classes_missing = set(classes_full) - set(classes_subsample)
+        else:
+            weight_k = compute_class_weight(
+                class_weight_k, classes=classes_full, y=y_full
+            )
+
+        weight_k = weight_k[np.searchsorted(classes_full, y_full)]
+
+        if classes_missing:
+            # Make missing classes' weight zero
+            weight_k[np.isin(y_full, list(classes_missing))] = 0.0
+
+        expanded_class_weight.append(weight_k)
+
+    expanded_class_weight = np.prod(expanded_class_weight, axis=0, dtype=np.float64)
+
+    return expanded_class_weight
@@ -0,0 +1,135 @@
+import functools
+import warnings
+
+__all__ = ["deprecated"]
+
+
+class deprecated:
+    """Decorator to mark a function or class as deprecated.
+
+    Issue a warning when the function is called/the class is instantiated and
+    adds a warning to the docstring.
+
+    The optional extra argument will be appended to the deprecation message
+    and the docstring. Note: to use this with the default value for extra, put
+    in an empty of parentheses:
+
+    Examples
+    --------
+    >>> from sklearn.utils import deprecated
+    >>> deprecated()
+    <sklearn.utils.deprecation.deprecated object at ...>
+    >>> @deprecated()
+    ... def some_function(): pass
+
+    Parameters
+    ----------
+    extra : str, default=''
+          To be added to the deprecation messages.
+    """
+
+    # Adapted from https://wiki.python.org/moin/PythonDecoratorLibrary,
+    # but with many changes.
+
+    def __init__(self, extra=""):
+        self.extra = extra
+
+    def __call__(self, obj):
+        """Call method
+
+        Parameters
+        ----------
+        obj : object
+        """
+        if isinstance(obj, type):
+            return self._decorate_class(obj)
+        elif isinstance(obj, property):
+            # Note that this is only triggered properly if the `deprecated`
+            # decorator is placed before the `property` decorator, like so:
+            #
+            # @deprecated(msg)
+            # @property
+            # def deprecated_attribute_(self):
+            #     ...
+            return self._decorate_property(obj)
+        else:
+            return self._decorate_fun(obj)
+
+    def _decorate_class(self, cls):
+        msg = "Class %s is deprecated" % cls.__name__
+        if self.extra:
+            msg += "; %s" % self.extra
+
+        new = cls.__new__
+
+        def wrapped(cls, *args, **kwargs):
+            warnings.warn(msg, category=FutureWarning)
+            if new is object.__new__:
+                return object.__new__(cls)
+            return new(cls, *args, **kwargs)
+
+        cls.__new__ = wrapped
+
+        wrapped.__name__ = "__new__"
+        wrapped.deprecated_original = new
+
+        return cls
+
+    def _decorate_fun(self, fun):
+        """Decorate function fun"""
+
+        msg = "Function %s is deprecated" % fun.__name__
+        if self.extra:
+            msg += "; %s" % self.extra
+
+        @functools.wraps(fun)
+        def wrapped(*args, **kwargs):
+            warnings.warn(msg, category=FutureWarning)
+            return fun(*args, **kwargs)
+
+        # Add a reference to the wrapped function so that we can introspect
+        # on function arguments in Python 2 (already works in Python 3)
+        wrapped.__wrapped__ = fun
+
+        return wrapped
+
+    def _decorate_property(self, prop):
+        msg = self.extra
+
+        @property
+        @functools.wraps(prop)
+        def wrapped(*args, **kwargs):
+            warnings.warn(msg, category=FutureWarning)
+            return prop.fget(*args, **kwargs)
+
+        return wrapped
+
+
+def _is_deprecated(func):
+    """Helper to check if func is wrapped by our deprecated decorator"""
+    closures = getattr(func, "__closure__", [])
+    if closures is None:
+        closures = []
+    is_deprecated = "deprecated" in "".join(
+        [c.cell_contents for c in closures if isinstance(c.cell_contents, str)]
+    )
+    return is_deprecated
+
+
+# TODO: remove in 1.7
+def _deprecate_Xt_in_inverse_transform(X, Xt):
+    """Helper to deprecate the `Xt` argument in favor of `X` in inverse_transform."""
+    if X is not None and Xt is not None:
+        raise TypeError("Cannot use both X and Xt. Use X only.")
+
+    if X is None and Xt is None:
+        raise TypeError("Missing required positional argument: X.")
+
+    if Xt is not None:
+        warnings.warn(
+            "Xt was renamed X in version 1.5 and will be removed in 1.7.",
+            FutureWarning,
+        )
+        return Xt
+
+    return X
@@ -0,0 +1,265 @@
+"""
+The :mod:`sklearn.utils.discovery` module includes utilities to discover
+objects (i.e. estimators, displays, functions) from the `sklearn` package.
+"""
+
+import inspect
+import pkgutil
+from importlib import import_module
+from operator import itemgetter
+from pathlib import Path
+
+_MODULE_TO_IGNORE = {
+    "tests",
+    "externals",
+    "setup",
+    "conftest",
+    "experimental",
+    "estimator_checks",
+}
+
+
+def all_estimators(type_filter=None):
+    """Get a list of all estimators from `sklearn`.
+
+    This function crawls the module and gets all classes that inherit
+    from BaseEstimator. Classes that are defined in test-modules are not
+    included.
+
+    Parameters
+    ----------
+    type_filter : {"classifier", "regressor", "cluster", "transformer"} \
+            or list of such str, default=None
+        Which kind of estimators should be returned. If None, no filter is
+        applied and all estimators are returned.  Possible values are
+        'classifier', 'regressor', 'cluster' and 'transformer' to get
+        estimators only of these specific types, or a list of these to
+        get the estimators that fit at least one of the types.
+
+    Returns
+    -------
+    estimators : list of tuples
+        List of (name, class), where ``name`` is the class name as string
+        and ``class`` is the actual type of the class.
+
+    Examples
+    --------
+    >>> from sklearn.utils.discovery import all_estimators
+    >>> estimators = all_estimators()
+    >>> type(estimators)
+    <class 'list'>
+    >>> type(estimators[0])
+    <class 'tuple'>
+    >>> estimators[:2]
+    [('ARDRegression', <class 'sklearn.linear_model._bayes.ARDRegression'>),
+     ('AdaBoostClassifier',
+      <class 'sklearn.ensemble._weight_boosting.AdaBoostClassifier'>)]
+    >>> classifiers = all_estimators(type_filter="classifier")
+    >>> classifiers[:2]
+    [('AdaBoostClassifier',
+      <class 'sklearn.ensemble._weight_boosting.AdaBoostClassifier'>),
+     ('BaggingClassifier', <class 'sklearn.ensemble._bagging.BaggingClassifier'>)]
+    >>> regressors = all_estimators(type_filter="regressor")
+    >>> regressors[:2]
+    [('ARDRegression', <class 'sklearn.linear_model._bayes.ARDRegression'>),
+     ('AdaBoostRegressor',
+      <class 'sklearn.ensemble._weight_boosting.AdaBoostRegressor'>)]
+    >>> both = all_estimators(type_filter=["classifier", "regressor"])
+    >>> both[:2]
+    [('ARDRegression', <class 'sklearn.linear_model._bayes.ARDRegression'>),
+     ('AdaBoostClassifier',
+      <class 'sklearn.ensemble._weight_boosting.AdaBoostClassifier'>)]
+    """
+    # lazy import to avoid circular imports from sklearn.base
+    from ..base import (
+        BaseEstimator,
+        ClassifierMixin,
+        ClusterMixin,
+        RegressorMixin,
+        TransformerMixin,
+    )
+    from ._testing import ignore_warnings
+    from .fixes import _IS_PYPY
+
+    def is_abstract(c):
+        if not (hasattr(c, "__abstractmethods__")):
+            return False
+        if not len(c.__abstractmethods__):
+            return False
+        return True
+
+    all_classes = []
+    root = str(Path(__file__).parent.parent)  # sklearn package
+    # Ignore deprecation warnings triggered at import time and from walking
+    # packages
+    with ignore_warnings(category=FutureWarning):
+        for _, module_name, _ in pkgutil.walk_packages(path=[root], prefix="sklearn."):
+            module_parts = module_name.split(".")
+            if (
+                any(part in _MODULE_TO_IGNORE for part in module_parts)
+                or "._" in module_name
+            ):
+                continue
+            module = import_module(module_name)
+            classes = inspect.getmembers(module, inspect.isclass)
+            classes = [
+                (name, est_cls) for name, est_cls in classes if not name.startswith("_")
+            ]
+
+            # TODO: Remove when FeatureHasher is implemented in PYPY
+            # Skips FeatureHasher for PYPY
+            if _IS_PYPY and "feature_extraction" in module_name:
+                classes = [
+                    (name, est_cls)
+                    for name, est_cls in classes
+                    if name == "FeatureHasher"
+                ]
+
+            all_classes.extend(classes)
+
+    all_classes = set(all_classes)
+
+    estimators = [
+        c
+        for c in all_classes
+        if (issubclass(c[1], BaseEstimator) and c[0] != "BaseEstimator")
+    ]
+    # get rid of abstract base classes
+    estimators = [c for c in estimators if not is_abstract(c[1])]
+
+    if type_filter is not None:
+        if not isinstance(type_filter, list):
+            type_filter = [type_filter]
+        else:
+            type_filter = list(type_filter)  # copy
+        filtered_estimators = []
+        filters = {
+            "classifier": ClassifierMixin,
+            "regressor": RegressorMixin,
+            "transformer": TransformerMixin,
+            "cluster": ClusterMixin,
+        }
+        for name, mixin in filters.items():
+            if name in type_filter:
+                type_filter.remove(name)
+                filtered_estimators.extend(
+                    [est for est in estimators if issubclass(est[1], mixin)]
+                )
+        estimators = filtered_estimators
+        if type_filter:
+            raise ValueError(
+                "Parameter type_filter must be 'classifier', "
+                "'regressor', 'transformer', 'cluster' or "
+                "None, got"
+                f" {repr(type_filter)}."
+            )
+
+    # drop duplicates, sort for reproducibility
+    # itemgetter is used to ensure the sort does not extend to the 2nd item of
+    # the tuple
+    return sorted(set(estimators), key=itemgetter(0))
+
+
+def all_displays():
+    """Get a list of all displays from `sklearn`.
+
+    Returns
+    -------
+    displays : list of tuples
+        List of (name, class), where ``name`` is the display class name as
+        string and ``class`` is the actual type of the class.
+
+    Examples
+    --------
+    >>> from sklearn.utils.discovery import all_displays
+    >>> displays = all_displays()
+    >>> displays[0]
+    ('CalibrationDisplay', <class 'sklearn.calibration.CalibrationDisplay'>)
+    """
+    # lazy import to avoid circular imports from sklearn.base
+    from ._testing import ignore_warnings
+
+    all_classes = []
+    root = str(Path(__file__).parent.parent)  # sklearn package
+    # Ignore deprecation warnings triggered at import time and from walking
+    # packages
+    with ignore_warnings(category=FutureWarning):
+        for _, module_name, _ in pkgutil.walk_packages(path=[root], prefix="sklearn."):
+            module_parts = module_name.split(".")
+            if (
+                any(part in _MODULE_TO_IGNORE for part in module_parts)
+                or "._" in module_name
+            ):
+                continue
+            module = import_module(module_name)
+            classes = inspect.getmembers(module, inspect.isclass)
+            classes = [
+                (name, display_class)
+                for name, display_class in classes
+                if not name.startswith("_") and name.endswith("Display")
+            ]
+            all_classes.extend(classes)
+
+    return sorted(set(all_classes), key=itemgetter(0))
+
+
+def _is_checked_function(item):
+    if not inspect.isfunction(item):
+        return False
+
+    if item.__name__.startswith("_"):
+        return False
+
+    mod = item.__module__
+    if not mod.startswith("sklearn.") or mod.endswith("estimator_checks"):
+        return False
+
+    return True
+
+
+def all_functions():
+    """Get a list of all functions from `sklearn`.
+
+    Returns
+    -------
+    functions : list of tuples
+        List of (name, function), where ``name`` is the function name as
+        string and ``function`` is the actual function.
+
+    Examples
+    --------
+    >>> from sklearn.utils.discovery import all_functions
+    >>> functions = all_functions()
+    >>> name, function = functions[0]
+    >>> name
+    'accuracy_score'
+    """
+    # lazy import to avoid circular imports from sklearn.base
+    from ._testing import ignore_warnings
+
+    all_functions = []
+    root = str(Path(__file__).parent.parent)  # sklearn package
+    # Ignore deprecation warnings triggered at import time and from walking
+    # packages
+    with ignore_warnings(category=FutureWarning):
+        for _, module_name, _ in pkgutil.walk_packages(path=[root], prefix="sklearn."):
+            module_parts = module_name.split(".")
+            if (
+                any(part in _MODULE_TO_IGNORE for part in module_parts)
+                or "._" in module_name
+            ):
+                continue
+
+            module = import_module(module_name)
+            functions = inspect.getmembers(module, _is_checked_function)
+            functions = [
+                (func.__name__, func)
+                for name, func in functions
+                if not name.startswith("_")
+            ]
+            all_functions.extend(functions)
+
+    # drop duplicates, sort for reproducibility
+    # itemgetter is used to ensure the sort does not extend to the 2nd item of
+    # the tuple
+    return sorted(set(all_functions), key=itemgetter(0))
@@ -0,0 +1,417 @@
+"""Compatibility fixes for older version of python, numpy and scipy
+
+If you add content to this file, please give the version of the package
+at which the fix is no longer needed.
+"""
+
+# Authors: Emmanuelle Gouillart <emmanuelle.gouillart@normalesup.org>
+#          Gael Varoquaux <gael.varoquaux@normalesup.org>
+#          Fabian Pedregosa <fpedregosa@acm.org>
+#          Lars Buitinck
+#
+# License: BSD 3 clause
+
+import platform
+import struct
+
+import numpy as np
+import scipy
+import scipy.sparse.linalg
+import scipy.stats
+
+import sklearn
+
+from ..externals._packaging.version import parse as parse_version
+
+_IS_PYPY = platform.python_implementation() == "PyPy"
+_IS_32BIT = 8 * struct.calcsize("P") == 32
+_IS_WASM = platform.machine() in ["wasm32", "wasm64"]
+
+np_version = parse_version(np.__version__)
+np_base_version = parse_version(np_version.base_version)
+sp_version = parse_version(scipy.__version__)
+sp_base_version = parse_version(sp_version.base_version)
+
+# TODO: We can consider removing the containers and importing
+# directly from SciPy when sparse matrices will be deprecated.
+CSR_CONTAINERS = [scipy.sparse.csr_matrix]
+CSC_CONTAINERS = [scipy.sparse.csc_matrix]
+COO_CONTAINERS = [scipy.sparse.coo_matrix]
+LIL_CONTAINERS = [scipy.sparse.lil_matrix]
+DOK_CONTAINERS = [scipy.sparse.dok_matrix]
+BSR_CONTAINERS = [scipy.sparse.bsr_matrix]
+DIA_CONTAINERS = [scipy.sparse.dia_matrix]
+
+if parse_version(scipy.__version__) >= parse_version("1.8"):
+    # Sparse Arrays have been added in SciPy 1.8
+    # TODO: When SciPy 1.8 is the minimum supported version,
+    # those list can be created directly without this condition.
+    # See: https://github.com/scikit-learn/scikit-learn/issues/27090
+    CSR_CONTAINERS.append(scipy.sparse.csr_array)
+    CSC_CONTAINERS.append(scipy.sparse.csc_array)
+    COO_CONTAINERS.append(scipy.sparse.coo_array)
+    LIL_CONTAINERS.append(scipy.sparse.lil_array)
+    DOK_CONTAINERS.append(scipy.sparse.dok_array)
+    BSR_CONTAINERS.append(scipy.sparse.bsr_array)
+    DIA_CONTAINERS.append(scipy.sparse.dia_array)
+
+
+# Remove when minimum scipy version is 1.11.0
+try:
+    from scipy.sparse import sparray  # noqa
+
+    SPARRAY_PRESENT = True
+except ImportError:
+    SPARRAY_PRESENT = False
+
+
+# Remove when minimum scipy version is 1.8
+try:
+    from scipy.sparse import csr_array  # noqa
+
+    SPARSE_ARRAY_PRESENT = True
+except ImportError:
+    SPARSE_ARRAY_PRESENT = False
+
+
+try:
+    from scipy.optimize._linesearch import line_search_wolfe1, line_search_wolfe2
+except ImportError:  # SciPy < 1.8
+    from scipy.optimize.linesearch import line_search_wolfe2, line_search_wolfe1  # type: ignore  # noqa
+
+
+def _object_dtype_isnan(X):
+    return X != X
+
+
+# Rename the `method` kwarg to `interpolation` for NumPy < 1.22, because
+# `interpolation` kwarg was deprecated in favor of `method` in NumPy >= 1.22.
+def _percentile(a, q, *, method="linear", **kwargs):
+    return np.percentile(a, q, interpolation=method, **kwargs)
+
+
+if np_version < parse_version("1.22"):
+    percentile = _percentile
+else:  # >= 1.22
+    from numpy import percentile  # type: ignore  # noqa
+
+
+# TODO: Remove when SciPy 1.11 is the minimum supported version
+def _mode(a, axis=0):
+    if sp_version >= parse_version("1.9.0"):
+        mode = scipy.stats.mode(a, axis=axis, keepdims=True)
+        if sp_version >= parse_version("1.10.999"):
+            # scipy.stats.mode has changed returned array shape with axis=None
+            # and keepdims=True, see https://github.com/scipy/scipy/pull/17561
+            if axis is None:
+                mode = np.ravel(mode)
+        return mode
+    return scipy.stats.mode(a, axis=axis)
+
+
+# TODO: Remove when Scipy 1.12 is the minimum supported version
+if sp_base_version >= parse_version("1.12.0"):
+    _sparse_linalg_cg = scipy.sparse.linalg.cg
+else:
+
+    def _sparse_linalg_cg(A, b, **kwargs):
+        if "rtol" in kwargs:
+            kwargs["tol"] = kwargs.pop("rtol")
+        if "atol" not in kwargs:
+            kwargs["atol"] = "legacy"
+        return scipy.sparse.linalg.cg(A, b, **kwargs)
+
+
+# TODO: Fuse the modern implementations of _sparse_min_max and _sparse_nan_min_max
+# into the public min_max_axis function when Scipy 1.11 is the minimum supported
+# version and delete the backport in the else branch below.
+if sp_base_version >= parse_version("1.11.0"):
+
+    def _sparse_min_max(X, axis):
+        the_min = X.min(axis=axis)
+        the_max = X.max(axis=axis)
+
+        if axis is not None:
+            the_min = the_min.toarray().ravel()
+            the_max = the_max.toarray().ravel()
+
+        return the_min, the_max
+
+    def _sparse_nan_min_max(X, axis):
+        the_min = X.nanmin(axis=axis)
+        the_max = X.nanmax(axis=axis)
+
+        if axis is not None:
+            the_min = the_min.toarray().ravel()
+            the_max = the_max.toarray().ravel()
+
+        return the_min, the_max
+
+else:
+    # This code is mostly taken from scipy 0.14 and extended to handle nans, see
+    # https://github.com/scikit-learn/scikit-learn/pull/11196
+    def _minor_reduce(X, ufunc):
+        major_index = np.flatnonzero(np.diff(X.indptr))
+
+        # reduceat tries casts X.indptr to intp, which errors
+        # if it is int64 on a 32 bit system.
+        # Reinitializing prevents this where possible, see #13737
+        X = type(X)((X.data, X.indices, X.indptr), shape=X.shape)
+        value = ufunc.reduceat(X.data, X.indptr[major_index])
+        return major_index, value
+
+    def _min_or_max_axis(X, axis, min_or_max):
+        N = X.shape[axis]
+        if N == 0:
+            raise ValueError("zero-size array to reduction operation")
+        M = X.shape[1 - axis]
+        mat = X.tocsc() if axis == 0 else X.tocsr()
+        mat.sum_duplicates()
+        major_index, value = _minor_reduce(mat, min_or_max)
+        not_full = np.diff(mat.indptr)[major_index] < N
+        value[not_full] = min_or_max(value[not_full], 0)
+        mask = value != 0
+        major_index = np.compress(mask, major_index)
+        value = np.compress(mask, value)
+
+        if axis == 0:
+            res = scipy.sparse.coo_matrix(
+                (value, (np.zeros(len(value)), major_index)),
+                dtype=X.dtype,
+                shape=(1, M),
+            )
+        else:
+            res = scipy.sparse.coo_matrix(
+                (value, (major_index, np.zeros(len(value)))),
+                dtype=X.dtype,
+                shape=(M, 1),
+            )
+        return res.A.ravel()
+
+    def _sparse_min_or_max(X, axis, min_or_max):
+        if axis is None:
+            if 0 in X.shape:
+                raise ValueError("zero-size array to reduction operation")
+            zero = X.dtype.type(0)
+            if X.nnz == 0:
+                return zero
+            m = min_or_max.reduce(X.data.ravel())
+            if X.nnz != np.prod(X.shape):
+                m = min_or_max(zero, m)
+            return m
+        if axis < 0:
+            axis += 2
+        if (axis == 0) or (axis == 1):
+            return _min_or_max_axis(X, axis, min_or_max)
+        else:
+            raise ValueError("invalid axis, use 0 for rows, or 1 for columns")
+
+    def _sparse_min_max(X, axis):
+        return (
+            _sparse_min_or_max(X, axis, np.minimum),
+            _sparse_min_or_max(X, axis, np.maximum),
+        )
+
+    def _sparse_nan_min_max(X, axis):
+        return (
+            _sparse_min_or_max(X, axis, np.fmin),
+            _sparse_min_or_max(X, axis, np.fmax),
+        )
+
+
+# For +1.25 NumPy versions exceptions and warnings are being moved
+# to a dedicated submodule.
+if np_version >= parse_version("1.25.0"):
+    from numpy.exceptions import ComplexWarning, VisibleDeprecationWarning
+else:
+    from numpy import ComplexWarning, VisibleDeprecationWarning  # type: ignore  # noqa
+
+
+# TODO: Remove when Scipy 1.6 is the minimum supported version
+try:
+    from scipy.integrate import trapezoid  # type: ignore  # noqa
+except ImportError:
+    from scipy.integrate import trapz as trapezoid  # type: ignore  # noqa
+
+
+# TODO: Adapt when Pandas > 2.2 is the minimum supported version
+def pd_fillna(pd, frame):
+    pd_version = parse_version(pd.__version__).base_version
+    if parse_version(pd_version) < parse_version("2.2"):
+        frame = frame.fillna(value=np.nan)
+    else:
+        infer_objects_kwargs = (
+            {} if parse_version(pd_version) >= parse_version("3") else {"copy": False}
+        )
+        with pd.option_context("future.no_silent_downcasting", True):
+            frame = frame.fillna(value=np.nan).infer_objects(**infer_objects_kwargs)
+    return frame
+
+
+# TODO: remove when SciPy 1.12 is the minimum supported version
+def _preserve_dia_indices_dtype(
+    sparse_container, original_container_format, requested_sparse_format
+):
+    """Preserve indices dtype for SciPy < 1.12 when converting from DIA to CSR/CSC.
+
+    For SciPy < 1.12, DIA arrays indices are upcasted to `np.int64` that is
+    inconsistent with DIA matrices. We downcast the indices dtype to `np.int32` to
+    be consistent with DIA matrices.
+
+    The converted indices arrays are affected back inplace to the sparse container.
+
+    Parameters
+    ----------
+    sparse_container : sparse container
+        Sparse container to be checked.
+    requested_sparse_format : str or bool
+        The type of format of `sparse_container`.
+
+    Notes
+    -----
+    See https://github.com/scipy/scipy/issues/19245 for more details.
+    """
+    if original_container_format == "dia_array" and requested_sparse_format in (
+        "csr",
+        "coo",
+    ):
+        if requested_sparse_format == "csr":
+            index_dtype = _smallest_admissible_index_dtype(
+                arrays=(sparse_container.indptr, sparse_container.indices),
+                maxval=max(sparse_container.nnz, sparse_container.shape[1]),
+                check_contents=True,
+            )
+            sparse_container.indices = sparse_container.indices.astype(
+                index_dtype, copy=False
+            )
+            sparse_container.indptr = sparse_container.indptr.astype(
+                index_dtype, copy=False
+            )
+        else:  # requested_sparse_format == "coo"
+            index_dtype = _smallest_admissible_index_dtype(
+                maxval=max(sparse_container.shape)
+            )
+            sparse_container.row = sparse_container.row.astype(index_dtype, copy=False)
+            sparse_container.col = sparse_container.col.astype(index_dtype, copy=False)
+
+
+# TODO: remove when SciPy 1.12 is the minimum supported version
+def _smallest_admissible_index_dtype(arrays=(), maxval=None, check_contents=False):
+    """Based on input (integer) arrays `a`, determine a suitable index data
+    type that can hold the data in the arrays.
+
+    This function returns `np.int64` if it either required by `maxval` or based on the
+    largest precision of the dtype of the arrays passed as argument, or by the their
+    contents (when `check_contents is True`). If none of the condition requires
+    `np.int64` then this function returns `np.int32`.
+
+    Parameters
+    ----------
+    arrays : ndarray or tuple of ndarrays, default=()
+        Input arrays whose types/contents to check.
+
+    maxval : float, default=None
+        Maximum value needed.
+
+    check_contents : bool, default=False
+        Whether to check the values in the arrays and not just their types.
+        By default, check only the types.
+
+    Returns
+    -------
+    dtype : {np.int32, np.int64}
+        Suitable index data type (int32 or int64).
+    """
+
+    int32min = np.int32(np.iinfo(np.int32).min)
+    int32max = np.int32(np.iinfo(np.int32).max)
+
+    if maxval is not None:
+        if maxval > np.iinfo(np.int64).max:
+            raise ValueError(
+                f"maxval={maxval} is to large to be represented as np.int64."
+            )
+        if maxval > int32max:
+            return np.int64
+
+    if isinstance(arrays, np.ndarray):
+        arrays = (arrays,)
+
+    for arr in arrays:
+        if not isinstance(arr, np.ndarray):
+            raise TypeError(
+                f"Arrays should be of type np.ndarray, got {type(arr)} instead."
+            )
+        if not np.issubdtype(arr.dtype, np.integer):
+            raise ValueError(
+                f"Array dtype {arr.dtype} is not supported for index dtype. We expect "
+                "integral values."
+            )
+        if not np.can_cast(arr.dtype, np.int32):
+            if not check_contents:
+                # when `check_contents` is False, we stay on the safe side and return
+                # np.int64.
+                return np.int64
+            if arr.size == 0:
+                # a bigger type not needed yet, let's look at the next array
+                continue
+            else:
+                maxval = arr.max()
+                minval = arr.min()
+                if minval < int32min or maxval > int32max:
+                    # a big index type is actually needed
+                    return np.int64
+
+    return np.int32
+
+
+# TODO: Remove when Scipy 1.12 is the minimum supported version
+if sp_version < parse_version("1.12"):
+    from ..externals._scipy.sparse.csgraph import laplacian  # type: ignore  # noqa
+else:
+    from scipy.sparse.csgraph import laplacian  # type: ignore  # noqa  # pragma: no cover
+
+
+# TODO: Remove when we drop support for Python 3.9. Note the filter argument has
+# been back-ported in 3.9.17 but we can not assume anything about the micro
+# version, see
+# https://docs.python.org/3.9/library/tarfile.html#tarfile.TarFile.extractall
+# for more details
+def tarfile_extractall(tarfile, path):
+    try:
+        tarfile.extractall(path, filter="data")
+    except TypeError:
+        tarfile.extractall(path)
+
+
+def _in_unstable_openblas_configuration():
+    """Return True if in an unstable configuration for OpenBLAS"""
+
+    # Import libraries which might load OpenBLAS.
+    import numpy  # noqa
+    import scipy  # noqa
+
+    modules_info = sklearn._threadpool_controller.info()
+
+    open_blas_used = any(info["internal_api"] == "openblas" for info in modules_info)
+    if not open_blas_used:
+        return False
+
+    # OpenBLAS 0.3.16 fixed instability for arm64, see:
+    # https://github.com/xianyi/OpenBLAS/blob/1b6db3dbba672b4f8af935bd43a1ff6cff4d20b7/Changelog.txt#L56-L58 # noqa
+    openblas_arm64_stable_version = parse_version("0.3.16")
+    for info in modules_info:
+        if info["internal_api"] != "openblas":
+            continue
+        openblas_version = info.get("version")
+        openblas_architecture = info.get("architecture")
+        if openblas_version is None or openblas_architecture is None:
+            # Cannot be sure that OpenBLAS is good enough. Assume unstable:
+            return True  # pragma: no cover
+        if (
+            openblas_architecture == "neoversen1"
+            and parse_version(openblas_version) < openblas_arm64_stable_version
+        ):
+            # See discussions in https://github.com/numpy/numpy/issues/19411
+            return True  # pragma: no cover
+    return False
@@ -0,0 +1,166 @@
+"""
+The :mod:`sklearn.utils.graph` module includes graph utilities and algorithms.
+"""
+
+# Authors: Aric Hagberg <hagberg@lanl.gov>
+#          Gael Varoquaux <gael.varoquaux@normalesup.org>
+#          Jake Vanderplas <vanderplas@astro.washington.edu>
+# License: BSD 3 clause
+
+import numpy as np
+from scipy import sparse
+
+from ..metrics.pairwise import pairwise_distances
+from ._param_validation import Integral, Interval, validate_params
+
+
+###############################################################################
+# Path and connected component analysis.
+# Code adapted from networkx
+@validate_params(
+    {
+        "graph": ["array-like", "sparse matrix"],
+        "source": [Interval(Integral, 0, None, closed="left")],
+        "cutoff": [Interval(Integral, 0, None, closed="left"), None],
+    },
+    prefer_skip_nested_validation=True,
+)
+def single_source_shortest_path_length(graph, source, *, cutoff=None):
+    """Return the length of the shortest path from source to all reachable nodes.
+
+    Parameters
+    ----------
+    graph : {array-like, sparse matrix} of shape (n_nodes, n_nodes)
+        Adjacency matrix of the graph. Sparse matrix of format LIL is
+        preferred.
+
+    source : int
+       Start node for path.
+
+    cutoff : int, default=None
+        Depth to stop the search - only paths of length <= cutoff are returned.
+
+    Returns
+    -------
+    paths : dict
+        Reachable end nodes mapped to length of path from source,
+        i.e. `{end: path_length}`.
+
+    Examples
+    --------
+    >>> from sklearn.utils.graph import single_source_shortest_path_length
+    >>> import numpy as np
+    >>> graph = np.array([[ 0, 1, 0, 0],
+    ...                   [ 1, 0, 1, 0],
+    ...                   [ 0, 1, 0, 0],
+    ...                   [ 0, 0, 0, 0]])
+    >>> single_source_shortest_path_length(graph, 0)
+    {0: 0, 1: 1, 2: 2}
+    >>> graph = np.ones((6, 6))
+    >>> sorted(single_source_shortest_path_length(graph, 2).items())
+    [(0, 1), (1, 1), (2, 0), (3, 1), (4, 1), (5, 1)]
+    """
+    if sparse.issparse(graph):
+        graph = graph.tolil()
+    else:
+        graph = sparse.lil_matrix(graph)
+    seen = {}  # level (number of hops) when seen in BFS
+    level = 0  # the current level
+    next_level = [source]  # dict of nodes to check at next level
+    while next_level:
+        this_level = next_level  # advance to next level
+        next_level = set()  # and start a new list (fringe)
+        for v in this_level:
+            if v not in seen:
+                seen[v] = level  # set the level of vertex v
+                next_level.update(graph.rows[v])
+        if cutoff is not None and cutoff <= level:
+            break
+        level += 1
+    return seen  # return all path lengths as dictionary
+
+
+def _fix_connected_components(
+    X,
+    graph,
+    n_connected_components,
+    component_labels,
+    mode="distance",
+    metric="euclidean",
+    **kwargs,
+):
+    """Add connections to sparse graph to connect unconnected components.
+
+    For each pair of unconnected components, compute all pairwise distances
+    from one component to the other, and add a connection on the closest pair
+    of samples. This is a hacky way to get a graph with a single connected
+    component, which is necessary for example to compute a shortest path
+    between all pairs of samples in the graph.
+
+    Parameters
+    ----------
+    X : array of shape (n_samples, n_features) or (n_samples, n_samples)
+        Features to compute the pairwise distances. If `metric =
+        "precomputed"`, X is the matrix of pairwise distances.
+
+    graph : sparse matrix of shape (n_samples, n_samples)
+        Graph of connection between samples.
+
+    n_connected_components : int
+        Number of connected components, as computed by
+        `scipy.sparse.csgraph.connected_components`.
+
+    component_labels : array of shape (n_samples)
+        Labels of connected components, as computed by
+        `scipy.sparse.csgraph.connected_components`.
+
+    mode : {'connectivity', 'distance'}, default='distance'
+        Type of graph matrix: 'connectivity' corresponds to the connectivity
+        matrix with ones and zeros, and 'distance' corresponds to the distances
+        between neighbors according to the given metric.
+
+    metric : str
+        Metric used in `sklearn.metrics.pairwise.pairwise_distances`.
+
+    kwargs : kwargs
+        Keyword arguments passed to
+        `sklearn.metrics.pairwise.pairwise_distances`.
+
+    Returns
+    -------
+    graph : sparse matrix of shape (n_samples, n_samples)
+        Graph of connection between samples, with a single connected component.
+    """
+    if metric == "precomputed" and sparse.issparse(X):
+        raise RuntimeError(
+            "_fix_connected_components with metric='precomputed' requires the "
+            "full distance matrix in X, and does not work with a sparse "
+            "neighbors graph."
+        )
+
+    for i in range(n_connected_components):
+        idx_i = np.flatnonzero(component_labels == i)
+        Xi = X[idx_i]
+        for j in range(i):
+            idx_j = np.flatnonzero(component_labels == j)
+            Xj = X[idx_j]
+
+            if metric == "precomputed":
+                D = X[np.ix_(idx_i, idx_j)]
+            else:
+                D = pairwise_distances(Xi, Xj, metric=metric, **kwargs)
+
+            ii, jj = np.unravel_index(D.argmin(axis=None), D.shape)
+            if mode == "connectivity":
+                graph[idx_i[ii], idx_j[jj]] = 1
+                graph[idx_j[jj], idx_i[ii]] = 1
+            elif mode == "distance":
+                graph[idx_i[ii], idx_j[jj]] = D[ii, jj]
+                graph[idx_j[jj], idx_i[ii]] = D[ii, jj]
+            else:
+                raise ValueError(
+                    "Unknown mode=%r, should be one of ['connectivity', 'distance']."
+                    % mode
+                )
+
+    return graph
@@ -0,0 +1,74 @@
+# utils is cimported from other subpackages so this is needed for the cimport
+# to work
+utils_cython_tree = [
+  # We add sklearn_root_cython_tree to make sure sklearn/__init__.py is copied
+  # early in the build
+  sklearn_root_cython_tree,
+  fs.copyfile('__init__.py'),
+  fs.copyfile('_cython_blas.pxd'),
+  fs.copyfile('_heap.pxd'),
+  fs.copyfile('_openmp_helpers.pxd'),
+  fs.copyfile('_random.pxd'),
+  fs.copyfile('_sorting.pxd'),
+  fs.copyfile('_typedefs.pxd'),
+  fs.copyfile('_vector_sentinel.pxd'),
+]
+
+utils_extension_metadata = {
+  'sparsefuncs_fast':
+    {'sources': ['sparsefuncs_fast.pyx']},
+  '_cython_blas': {'sources': ['_cython_blas.pyx']},
+  'arrayfuncs': {'sources': ['arrayfuncs.pyx']},
+  'murmurhash': {
+      'sources': ['murmurhash.pyx', 'src' / 'MurmurHash3.cpp'],
+  },
+  '_fast_dict':
+    {'sources': ['_fast_dict.pyx'], 'override_options': ['cython_language=cpp']},
+  '_openmp_helpers': {'sources': ['_openmp_helpers.pyx'], 'dependencies': [openmp_dep]},
+  '_random': {'sources': ['_random.pyx']},
+  '_typedefs': {'sources': ['_typedefs.pyx']},
+  '_heap': {'sources': ['_heap.pyx']},
+  '_sorting': {'sources': ['_sorting.pyx']},
+  '_vector_sentinel':
+    {'sources': ['_vector_sentinel.pyx'], 'override_options': ['cython_language=cpp'],
+     'dependencies': [np_dep]},
+  '_isfinite': {'sources': ['_isfinite.pyx']},
+}
+
+foreach ext_name, ext_dict : utils_extension_metadata
+  py.extension_module(
+    ext_name,
+    [ext_dict.get('sources'), utils_cython_tree],
+    dependencies: ext_dict.get('dependencies', []),
+    override_options : ext_dict.get('override_options', []),
+    cython_args: cython_args,
+    subdir: 'sklearn/utils',
+    install: true
+  )
+endforeach
+
+util_extension_names = ['_seq_dataset', '_weight_vector']
+
+foreach name: util_extension_names
+  pxd = custom_target(
+    name + '_pxd',
+    output: name + '.pxd',
+    input: name + '.pxd.tp',
+    command: [py, tempita, '@INPUT@', '-o', '@OUTDIR@'],
+  )
+  utils_cython_tree += [pxd]
+
+  pyx = custom_target(
+    name + '_pyx',
+    output: name + '.pyx',
+    input: name + '.pyx.tp',
+    command: [py, tempita, '@INPUT@', '-o', '@OUTDIR@']
+  )
+  py.extension_module(
+    name,
+    [pxd, pyx, utils_cython_tree],
+    cython_args: cython_args,
+    subdir: 'sklearn/utils',
+    install: true
+   )
+endforeach
@@ -0,0 +1,22 @@
+"""
+The :mod:`sklearn.utils.metadata_routing` module includes utilities to route
+metadata within scikit-learn estimators.
+"""
+
+# This module is not a separate sub-folder since that would result in a circular
+# import issue.
+#
+# Author: Adrin Jalali <adrin.jalali@gmail.com>
+# License: BSD 3 clause
+
+from ._metadata_requests import WARN, UNUSED, UNCHANGED  # noqa
+from ._metadata_requests import get_routing_for_object  # noqa
+from ._metadata_requests import MetadataRouter  # noqa
+from ._metadata_requests import MetadataRequest  # noqa
+from ._metadata_requests import MethodMapping  # noqa
+from ._metadata_requests import process_routing  # noqa
+from ._metadata_requests import _MetadataRequester  # noqa
+from ._metadata_requests import _routing_enabled  # noqa
+from ._metadata_requests import _raise_for_params  # noqa
+from ._metadata_requests import _RoutingNotSupportedMixin  # noqa
+from ._metadata_requests import _raise_for_unsupported_routing  # noqa
@@ -0,0 +1,165 @@
+"""
+The :mod:`sklearn.utils.metaestimators` module includes utilities for meta-estimators.
+"""
+
+# Author: Joel Nothman
+#         Andreas Mueller
+# License: BSD
+from abc import ABCMeta, abstractmethod
+from contextlib import suppress
+from typing import Any, List
+
+import numpy as np
+
+from ..base import BaseEstimator
+from ..utils import _safe_indexing
+from ..utils._tags import _safe_tags
+from ._available_if import available_if
+
+__all__ = ["available_if"]
+
+
+class _BaseComposition(BaseEstimator, metaclass=ABCMeta):
+    """Handles parameter management for classifiers composed of named estimators."""
+
+    steps: List[Any]
+
+    @abstractmethod
+    def __init__(self):
+        pass
+
+    def _get_params(self, attr, deep=True):
+        out = super().get_params(deep=deep)
+        if not deep:
+            return out
+
+        estimators = getattr(self, attr)
+        try:
+            out.update(estimators)
+        except (TypeError, ValueError):
+            # Ignore TypeError for cases where estimators is not a list of
+            # (name, estimator) and ignore ValueError when the list is not
+            # formatted correctly. This is to prevent errors when calling
+            # `set_params`. `BaseEstimator.set_params` calls `get_params` which
+            # can error for invalid values for `estimators`.
+            return out
+
+        for name, estimator in estimators:
+            if hasattr(estimator, "get_params"):
+                for key, value in estimator.get_params(deep=True).items():
+                    out["%s__%s" % (name, key)] = value
+        return out
+
+    def _set_params(self, attr, **params):
+        # Ensure strict ordering of parameter setting:
+        # 1. All steps
+        if attr in params:
+            setattr(self, attr, params.pop(attr))
+        # 2. Replace items with estimators in params
+        items = getattr(self, attr)
+        if isinstance(items, list) and items:
+            # Get item names used to identify valid names in params
+            # `zip` raises a TypeError when `items` does not contains
+            # elements of length 2
+            with suppress(TypeError):
+                item_names, _ = zip(*items)
+                for name in list(params.keys()):
+                    if "__" not in name and name in item_names:
+                        self._replace_estimator(attr, name, params.pop(name))
+
+        # 3. Step parameters and other initialisation arguments
+        super().set_params(**params)
+        return self
+
+    def _replace_estimator(self, attr, name, new_val):
+        # assumes `name` is a valid estimator name
+        new_estimators = list(getattr(self, attr))
+        for i, (estimator_name, _) in enumerate(new_estimators):
+            if estimator_name == name:
+                new_estimators[i] = (name, new_val)
+                break
+        setattr(self, attr, new_estimators)
+
+    def _validate_names(self, names):
+        if len(set(names)) != len(names):
+            raise ValueError("Names provided are not unique: {0!r}".format(list(names)))
+        invalid_names = set(names).intersection(self.get_params(deep=False))
+        if invalid_names:
+            raise ValueError(
+                "Estimator names conflict with constructor arguments: {0!r}".format(
+                    sorted(invalid_names)
+                )
+            )
+        invalid_names = [name for name in names if "__" in name]
+        if invalid_names:
+            raise ValueError(
+                "Estimator names must not contain __: got {0!r}".format(invalid_names)
+            )
+
+
+def _safe_split(estimator, X, y, indices, train_indices=None):
+    """Create subset of dataset and properly handle kernels.
+
+    Slice X, y according to indices for cross-validation, but take care of
+    precomputed kernel-matrices or pairwise affinities / distances.
+
+    If ``estimator._pairwise is True``, X needs to be square and
+    we slice rows and columns. If ``train_indices`` is not None,
+    we slice rows using ``indices`` (assumed the test set) and columns
+    using ``train_indices``, indicating the training set.
+
+    Labels y will always be indexed only along the first axis.
+
+    Parameters
+    ----------
+    estimator : object
+        Estimator to determine whether we should slice only rows or rows and
+        columns.
+
+    X : array-like, sparse matrix or iterable
+        Data to be indexed. If ``estimator._pairwise is True``,
+        this needs to be a square array-like or sparse matrix.
+
+    y : array-like, sparse matrix or iterable
+        Targets to be indexed.
+
+    indices : array of int
+        Rows to select from X and y.
+        If ``estimator._pairwise is True`` and ``train_indices is None``
+        then ``indices`` will also be used to slice columns.
+
+    train_indices : array of int or None, default=None
+        If ``estimator._pairwise is True`` and ``train_indices is not None``,
+        then ``train_indices`` will be use to slice the columns of X.
+
+    Returns
+    -------
+    X_subset : array-like, sparse matrix or list
+        Indexed data.
+
+    y_subset : array-like, sparse matrix or list
+        Indexed targets.
+
+    """
+    if _safe_tags(estimator, key="pairwise"):
+        if not hasattr(X, "shape"):
+            raise ValueError(
+                "Precomputed kernels or affinity matrices have "
+                "to be passed as arrays or sparse matrices."
+            )
+        # X is a precomputed square kernel matrix
+        if X.shape[0] != X.shape[1]:
+            raise ValueError("X should be a square kernel matrix")
+        if train_indices is None:
+            X_subset = X[np.ix_(indices, indices)]
+        else:
+            X_subset = X[np.ix_(indices, train_indices)]
+    else:
+        X_subset = _safe_indexing(X, indices)
+
+    if y is not None:
+        y_subset = _safe_indexing(y, indices)
+    else:
+        y_subset = None
+
+    return X_subset, y_subset
@@ -0,0 +1,564 @@
+"""
+The :mod:`sklearn.utils.multiclass` module includes utilities to handle
+multiclass/multioutput target in classifiers.
+"""
+
+# Author: Arnaud Joly, Joel Nothman, Hamzeh Alsalhi
+#
+# License: BSD 3 clause
+import warnings
+from collections.abc import Sequence
+from itertools import chain
+
+import numpy as np
+from scipy.sparse import issparse
+
+from ..utils._array_api import get_namespace
+from ..utils.fixes import VisibleDeprecationWarning
+from .validation import _assert_all_finite, check_array
+
+
+def _unique_multiclass(y):
+    xp, is_array_api_compliant = get_namespace(y)
+    if hasattr(y, "__array__") or is_array_api_compliant:
+        return xp.unique_values(xp.asarray(y))
+    else:
+        return set(y)
+
+
+def _unique_indicator(y):
+    xp, _ = get_namespace(y)
+    return xp.arange(
+        check_array(y, input_name="y", accept_sparse=["csr", "csc", "coo"]).shape[1]
+    )
+
+
+_FN_UNIQUE_LABELS = {
+    "binary": _unique_multiclass,
+    "multiclass": _unique_multiclass,
+    "multilabel-indicator": _unique_indicator,
+}
+
+
+def unique_labels(*ys):
+    """Extract an ordered array of unique labels.
+
+    We don't allow:
+        - mix of multilabel and multiclass (single label) targets
+        - mix of label indicator matrix and anything else,
+          because there are no explicit labels)
+        - mix of label indicator matrices of different sizes
+        - mix of string and integer labels
+
+    At the moment, we also don't allow "multiclass-multioutput" input type.
+
+    Parameters
+    ----------
+    *ys : array-likes
+        Label values.
+
+    Returns
+    -------
+    out : ndarray of shape (n_unique_labels,)
+        An ordered array of unique labels.
+
+    Examples
+    --------
+    >>> from sklearn.utils.multiclass import unique_labels
+    >>> unique_labels([3, 5, 5, 5, 7, 7])
+    array([3, 5, 7])
+    >>> unique_labels([1, 2, 3, 4], [2, 2, 3, 4])
+    array([1, 2, 3, 4])
+    >>> unique_labels([1, 2, 10], [5, 11])
+    array([ 1,  2,  5, 10, 11])
+    """
+    xp, is_array_api_compliant = get_namespace(*ys)
+    if not ys:
+        raise ValueError("No argument has been passed.")
+    # Check that we don't mix label format
+
+    ys_types = set(type_of_target(x) for x in ys)
+    if ys_types == {"binary", "multiclass"}:
+        ys_types = {"multiclass"}
+
+    if len(ys_types) > 1:
+        raise ValueError("Mix type of y not allowed, got types %s" % ys_types)
+
+    label_type = ys_types.pop()
+
+    # Check consistency for the indicator format
+    if (
+        label_type == "multilabel-indicator"
+        and len(
+            set(
+                check_array(y, accept_sparse=["csr", "csc", "coo"]).shape[1] for y in ys
+            )
+        )
+        > 1
+    ):
+        raise ValueError(
+            "Multi-label binary indicator input with different numbers of labels"
+        )
+
+    # Get the unique set of labels
+    _unique_labels = _FN_UNIQUE_LABELS.get(label_type, None)
+    if not _unique_labels:
+        raise ValueError("Unknown label type: %s" % repr(ys))
+
+    if is_array_api_compliant:
+        # array_api does not allow for mixed dtypes
+        unique_ys = xp.concat([_unique_labels(y) for y in ys])
+        return xp.unique_values(unique_ys)
+
+    ys_labels = set(chain.from_iterable((i for i in _unique_labels(y)) for y in ys))
+    # Check that we don't mix string type with number type
+    if len(set(isinstance(label, str) for label in ys_labels)) > 1:
+        raise ValueError("Mix of label input types (string and number)")
+
+    return xp.asarray(sorted(ys_labels))
+
+
+def _is_integral_float(y):
+    xp, is_array_api_compliant = get_namespace(y)
+    return xp.isdtype(y.dtype, "real floating") and bool(
+        xp.all(xp.astype((xp.astype(y, xp.int64)), y.dtype) == y)
+    )
+
+
+def is_multilabel(y):
+    """Check if ``y`` is in a multilabel format.
+
+    Parameters
+    ----------
+    y : ndarray of shape (n_samples,)
+        Target values.
+
+    Returns
+    -------
+    out : bool
+        Return ``True``, if ``y`` is in a multilabel format, else ```False``.
+
+    Examples
+    --------
+    >>> import numpy as np
+    >>> from sklearn.utils.multiclass import is_multilabel
+    >>> is_multilabel([0, 1, 0, 1])
+    False
+    >>> is_multilabel([[1], [0, 2], []])
+    False
+    >>> is_multilabel(np.array([[1, 0], [0, 0]]))
+    True
+    >>> is_multilabel(np.array([[1], [0], [0]]))
+    False
+    >>> is_multilabel(np.array([[1, 0, 0]]))
+    True
+    """
+    xp, is_array_api_compliant = get_namespace(y)
+    if hasattr(y, "__array__") or isinstance(y, Sequence) or is_array_api_compliant:
+        # DeprecationWarning will be replaced by ValueError, see NEP 34
+        # https://numpy.org/neps/nep-0034-infer-dtype-is-object.html
+        check_y_kwargs = dict(
+            accept_sparse=True,
+            allow_nd=True,
+            force_all_finite=False,
+            ensure_2d=False,
+            ensure_min_samples=0,
+            ensure_min_features=0,
+        )
+        with warnings.catch_warnings():
+            warnings.simplefilter("error", VisibleDeprecationWarning)
+            try:
+                y = check_array(y, dtype=None, **check_y_kwargs)
+            except (VisibleDeprecationWarning, ValueError) as e:
+                if str(e).startswith("Complex data not supported"):
+                    raise
+
+                # dtype=object should be provided explicitly for ragged arrays,
+                # see NEP 34
+                y = check_array(y, dtype=object, **check_y_kwargs)
+
+    if not (hasattr(y, "shape") and y.ndim == 2 and y.shape[1] > 1):
+        return False
+
+    if issparse(y):
+        if y.format in ("dok", "lil"):
+            y = y.tocsr()
+        labels = xp.unique_values(y.data)
+        return (
+            len(y.data) == 0
+            or (labels.size == 1 or (labels.size == 2) and (0 in labels))
+            and (y.dtype.kind in "biu" or _is_integral_float(labels))  # bool, int, uint
+        )
+    else:
+        labels = xp.unique_values(y)
+
+        return labels.shape[0] < 3 and (
+            xp.isdtype(y.dtype, ("bool", "signed integer", "unsigned integer"))
+            or _is_integral_float(labels)
+        )
+
+
+def check_classification_targets(y):
+    """Ensure that target y is of a non-regression type.
+
+    Only the following target types (as defined in type_of_target) are allowed:
+        'binary', 'multiclass', 'multiclass-multioutput',
+        'multilabel-indicator', 'multilabel-sequences'
+
+    Parameters
+    ----------
+    y : array-like
+        Target values.
+    """
+    y_type = type_of_target(y, input_name="y")
+    if y_type not in [
+        "binary",
+        "multiclass",
+        "multiclass-multioutput",
+        "multilabel-indicator",
+        "multilabel-sequences",
+    ]:
+        raise ValueError(
+            f"Unknown label type: {y_type}. Maybe you are trying to fit a "
+            "classifier, which expects discrete classes on a "
+            "regression target with continuous values."
+        )
+
+
+def type_of_target(y, input_name=""):
+    """Determine the type of data indicated by the target.
+
+    Note that this type is the most specific type that can be inferred.
+    For example:
+
+        * ``binary`` is more specific but compatible with ``multiclass``.
+        * ``multiclass`` of integers is more specific but compatible with
+          ``continuous``.
+        * ``multilabel-indicator`` is more specific but compatible with
+          ``multiclass-multioutput``.
+
+    Parameters
+    ----------
+    y : {array-like, sparse matrix}
+        Target values. If a sparse matrix, `y` is expected to be a
+        CSR/CSC matrix.
+
+    input_name : str, default=""
+        The data name used to construct the error message.
+
+        .. versionadded:: 1.1.0
+
+    Returns
+    -------
+    target_type : str
+        One of:
+
+        * 'continuous': `y` is an array-like of floats that are not all
+          integers, and is 1d or a column vector.
+        * 'continuous-multioutput': `y` is a 2d array of floats that are
+          not all integers, and both dimensions are of size > 1.
+        * 'binary': `y` contains <= 2 discrete values and is 1d or a column
+          vector.
+        * 'multiclass': `y` contains more than two discrete values, is not a
+          sequence of sequences, and is 1d or a column vector.
+        * 'multiclass-multioutput': `y` is a 2d array that contains more
+          than two discrete values, is not a sequence of sequences, and both
+          dimensions are of size > 1.
+        * 'multilabel-indicator': `y` is a label indicator matrix, an array
+          of two dimensions with at least two columns, and at most 2 unique
+          values.
+        * 'unknown': `y` is array-like but none of the above, such as a 3d
+          array, sequence of sequences, or an array of non-sequence objects.
+
+    Examples
+    --------
+    >>> from sklearn.utils.multiclass import type_of_target
+    >>> import numpy as np
+    >>> type_of_target([0.1, 0.6])
+    'continuous'
+    >>> type_of_target([1, -1, -1, 1])
+    'binary'
+    >>> type_of_target(['a', 'b', 'a'])
+    'binary'
+    >>> type_of_target([1.0, 2.0])
+    'binary'
+    >>> type_of_target([1, 0, 2])
+    'multiclass'
+    >>> type_of_target([1.0, 0.0, 3.0])
+    'multiclass'
+    >>> type_of_target(['a', 'b', 'c'])
+    'multiclass'
+    >>> type_of_target(np.array([[1, 2], [3, 1]]))
+    'multiclass-multioutput'
+    >>> type_of_target([[1, 2]])
+    'multilabel-indicator'
+    >>> type_of_target(np.array([[1.5, 2.0], [3.0, 1.6]]))
+    'continuous-multioutput'
+    >>> type_of_target(np.array([[0, 1], [1, 1]]))
+    'multilabel-indicator'
+    """
+    xp, is_array_api_compliant = get_namespace(y)
+    valid = (
+        (isinstance(y, Sequence) or issparse(y) or hasattr(y, "__array__"))
+        and not isinstance(y, str)
+        or is_array_api_compliant
+    )
+
+    if not valid:
+        raise ValueError(
+            "Expected array-like (array or non-string sequence), got %r" % y
+        )
+
+    sparse_pandas = y.__class__.__name__ in ["SparseSeries", "SparseArray"]
+    if sparse_pandas:
+        raise ValueError("y cannot be class 'SparseSeries' or 'SparseArray'")
+
+    if is_multilabel(y):
+        return "multilabel-indicator"
+
+    # DeprecationWarning will be replaced by ValueError, see NEP 34
+    # https://numpy.org/neps/nep-0034-infer-dtype-is-object.html
+    # We therefore catch both deprecation (NumPy < 1.24) warning and
+    # value error (NumPy >= 1.24).
+    check_y_kwargs = dict(
+        accept_sparse=True,
+        allow_nd=True,
+        force_all_finite=False,
+        ensure_2d=False,
+        ensure_min_samples=0,
+        ensure_min_features=0,
+    )
+
+    with warnings.catch_warnings():
+        warnings.simplefilter("error", VisibleDeprecationWarning)
+        if not issparse(y):
+            try:
+                y = check_array(y, dtype=None, **check_y_kwargs)
+            except (VisibleDeprecationWarning, ValueError) as e:
+                if str(e).startswith("Complex data not supported"):
+                    raise
+
+                # dtype=object should be provided explicitly for ragged arrays,
+                # see NEP 34
+                y = check_array(y, dtype=object, **check_y_kwargs)
+
+    try:
+        # TODO(1.7): Change to ValueError when byte labels is deprecated.
+        # labels in bytes format
+        first_row_or_val = y[[0], :] if issparse(y) else y[0]
+        if isinstance(first_row_or_val, bytes):
+            warnings.warn(
+                (
+                    "Support for labels represented as bytes is deprecated in v1.5 and"
+                    " will error in v1.7. Convert the labels to a string or integer"
+                    " format."
+                ),
+                FutureWarning,
+            )
+        # The old sequence of sequences format
+        if (
+            not hasattr(first_row_or_val, "__array__")
+            and isinstance(first_row_or_val, Sequence)
+            and not isinstance(first_row_or_val, str)
+        ):
+            raise ValueError(
+                "You appear to be using a legacy multi-label data"
+                " representation. Sequence of sequences are no"
+                " longer supported; use a binary array or sparse"
+                " matrix instead - the MultiLabelBinarizer"
+                " transformer can convert to this format."
+            )
+    except IndexError:
+        pass
+
+    # Invalid inputs
+    if y.ndim not in (1, 2):
+        # Number of dimension greater than 2: [[[1, 2]]]
+        return "unknown"
+    if not min(y.shape):
+        # Empty ndarray: []/[[]]
+        if y.ndim == 1:
+            # 1-D empty array: []
+            return "binary"  # []
+        # 2-D empty array: [[]]
+        return "unknown"
+    if not issparse(y) and y.dtype == object and not isinstance(y.flat[0], str):
+        # [obj_1] and not ["label_1"]
+        return "unknown"
+
+    # Check if multioutput
+    if y.ndim == 2 and y.shape[1] > 1:
+        suffix = "-multioutput"  # [[1, 2], [1, 2]]
+    else:
+        suffix = ""  # [1, 2, 3] or [[1], [2], [3]]
+
+    # Check float and contains non-integer float values
+    if xp.isdtype(y.dtype, "real floating"):
+        # [.1, .2, 3] or [[.1, .2, 3]] or [[1., .2]] and not [1., 2., 3.]
+        data = y.data if issparse(y) else y
+        if xp.any(data != xp.astype(data, int)):
+            _assert_all_finite(data, input_name=input_name)
+            return "continuous" + suffix
+
+    # Check multiclass
+    if issparse(first_row_or_val):
+        first_row_or_val = first_row_or_val.data
+    if xp.unique_values(y).shape[0] > 2 or (y.ndim == 2 and len(first_row_or_val) > 1):
+        # [1, 2, 3] or [[1., 2., 3]] or [[1, 2]]
+        return "multiclass" + suffix
+    else:
+        return "binary"  # [1, 2] or [["a"], ["b"]]
+
+
+def _check_partial_fit_first_call(clf, classes=None):
+    """Private helper function for factorizing common classes param logic.
+
+    Estimators that implement the ``partial_fit`` API need to be provided with
+    the list of possible classes at the first call to partial_fit.
+
+    Subsequent calls to partial_fit should check that ``classes`` is still
+    consistent with a previous value of ``clf.classes_`` when provided.
+
+    This function returns True if it detects that this was the first call to
+    ``partial_fit`` on ``clf``. In that case the ``classes_`` attribute is also
+    set on ``clf``.
+
+    """
+    if getattr(clf, "classes_", None) is None and classes is None:
+        raise ValueError("classes must be passed on the first call to partial_fit.")
+
+    elif classes is not None:
+        if getattr(clf, "classes_", None) is not None:
+            if not np.array_equal(clf.classes_, unique_labels(classes)):
+                raise ValueError(
+                    "`classes=%r` is not the same as on last call "
+                    "to partial_fit, was: %r" % (classes, clf.classes_)
+                )
+
+        else:
+            # This is the first call to partial_fit
+            clf.classes_ = unique_labels(classes)
+            return True
+
+    # classes is None and clf.classes_ has already previously been set:
+    # nothing to do
+    return False
+
+
+def class_distribution(y, sample_weight=None):
+    """Compute class priors from multioutput-multiclass target data.
+
+    Parameters
+    ----------
+    y : {array-like, sparse matrix} of size (n_samples, n_outputs)
+        The labels for each example.
+
+    sample_weight : array-like of shape (n_samples,), default=None
+        Sample weights.
+
+    Returns
+    -------
+    classes : list of size n_outputs of ndarray of size (n_classes,)
+        List of classes for each column.
+
+    n_classes : list of int of size n_outputs
+        Number of classes in each column.
+
+    class_prior : list of size n_outputs of ndarray of size (n_classes,)
+        Class distribution of each column.
+    """
+    classes = []
+    n_classes = []
+    class_prior = []
+
+    n_samples, n_outputs = y.shape
+    if sample_weight is not None:
+        sample_weight = np.asarray(sample_weight)
+
+    if issparse(y):
+        y = y.tocsc()
+        y_nnz = np.diff(y.indptr)
+
+        for k in range(n_outputs):
+            col_nonzero = y.indices[y.indptr[k] : y.indptr[k + 1]]
+            # separate sample weights for zero and non-zero elements
+            if sample_weight is not None:
+                nz_samp_weight = sample_weight[col_nonzero]
+                zeros_samp_weight_sum = np.sum(sample_weight) - np.sum(nz_samp_weight)
+            else:
+                nz_samp_weight = None
+                zeros_samp_weight_sum = y.shape[0] - y_nnz[k]
+
+            classes_k, y_k = np.unique(
+                y.data[y.indptr[k] : y.indptr[k + 1]], return_inverse=True
+            )
+            class_prior_k = np.bincount(y_k, weights=nz_samp_weight)
+
+            # An explicit zero was found, combine its weight with the weight
+            # of the implicit zeros
+            if 0 in classes_k:
+                class_prior_k[classes_k == 0] += zeros_samp_weight_sum
+
+            # If an there is an implicit zero and it is not in classes and
+            # class_prior, make an entry for it
+            if 0 not in classes_k and y_nnz[k] < y.shape[0]:
+                classes_k = np.insert(classes_k, 0, 0)
+                class_prior_k = np.insert(class_prior_k, 0, zeros_samp_weight_sum)
+
+            classes.append(classes_k)
+            n_classes.append(classes_k.shape[0])
+            class_prior.append(class_prior_k / class_prior_k.sum())
+    else:
+        for k in range(n_outputs):
+            classes_k, y_k = np.unique(y[:, k], return_inverse=True)
+            classes.append(classes_k)
+            n_classes.append(classes_k.shape[0])
+            class_prior_k = np.bincount(y_k, weights=sample_weight)
+            class_prior.append(class_prior_k / class_prior_k.sum())
+
+    return (classes, n_classes, class_prior)
+
+
+def _ovr_decision_function(predictions, confidences, n_classes):
+    """Compute a continuous, tie-breaking OvR decision function from OvO.
+
+    It is important to include a continuous value, not only votes,
+    to make computing AUC or calibration meaningful.
+
+    Parameters
+    ----------
+    predictions : array-like of shape (n_samples, n_classifiers)
+        Predicted classes for each binary classifier.
+
+    confidences : array-like of shape (n_samples, n_classifiers)
+        Decision functions or predicted probabilities for positive class
+        for each binary classifier.
+
+    n_classes : int
+        Number of classes. n_classifiers must be
+        ``n_classes * (n_classes - 1 ) / 2``.
+    """
+    n_samples = predictions.shape[0]
+    votes = np.zeros((n_samples, n_classes))
+    sum_of_confidences = np.zeros((n_samples, n_classes))
+
+    k = 0
+    for i in range(n_classes):
+        for j in range(i + 1, n_classes):
+            sum_of_confidences[:, i] -= confidences[:, k]
+            sum_of_confidences[:, j] += confidences[:, k]
+            votes[predictions[:, k] == 0, i] += 1
+            votes[predictions[:, k] == 1, j] += 1
+            k += 1
+
+    # Monotonically transform the sum_of_confidences to (-1/3, 1/3)
+    # and add it with votes. The monotonic transformation  is
+    # f: x -> x / (3 * (|x| + 1)), it uses 1/3 instead of 1/2
+    # to ensure that we won't reach the limits and change vote order.
+    # The motivation is to use confidence levels as a way to break ties in
+    # the votes without switching any decision made based on a difference
+    # of 1 vote.
+    transformed_confidences = sum_of_confidences / (
+        3 * (np.abs(sum_of_confidences) + 1)
+    )
+    return votes + transformed_confidences
@@ -0,0 +1,21 @@
+"""Export fast murmurhash C/C++ routines + cython wrappers"""
+
+from ..utils._typedefs cimport int32_t, uint32_t
+
+# The C API is disabled for now, since it requires -I flags to get
+# compilation to work even when these functions are not used.
+# cdef extern from "MurmurHash3.h":
+#     void MurmurHash3_x86_32(void* key, int len, unsigned int seed,
+#                             void* out)
+#
+#     void MurmurHash3_x86_128(void* key, int len, unsigned int seed,
+#                              void* out)
+#
+#     void MurmurHash3_x64_128(void* key, int len, unsigned int seed,
+#                              void* out)
+
+
+cpdef uint32_t murmurhash3_int_u32(int key, unsigned int seed)
+cpdef int32_t murmurhash3_int_s32(int key, unsigned int seed)
+cpdef uint32_t murmurhash3_bytes_u32(bytes key, unsigned int seed)
+cpdef int32_t murmurhash3_bytes_s32(bytes key, unsigned int seed)
@@ -0,0 +1,136 @@
+"""Cython wrapper for MurmurHash3 non-cryptographic hash function.
+
+MurmurHash is an extensively tested and very fast hash function that has
+good distribution properties suitable for machine learning use cases
+such as feature hashing and random projections.
+
+The original C++ code by Austin Appleby is released the public domain
+and can be found here:
+
+  https://code.google.com/p/smhasher/
+
+"""
+# Author: Olivier Grisel <olivier.grisel@ensta.org>
+#
+# License: BSD 3 clause
+
+from ..utils._typedefs cimport int32_t, uint32_t
+
+import numpy as np
+
+cdef extern from "src/MurmurHash3.h":
+    void MurmurHash3_x86_32(void *key, int len, uint32_t seed, void *out)
+    void MurmurHash3_x86_128(void *key, int len, uint32_t seed, void *out)
+    void MurmurHash3_x64_128 (void *key, int len, uint32_t seed, void *out)
+
+
+cpdef uint32_t murmurhash3_int_u32(int key, unsigned int seed):
+    """Compute the 32bit murmurhash3 of a int key at seed."""
+    cdef uint32_t out
+    MurmurHash3_x86_32(&key, sizeof(int), seed, &out)
+    return out
+
+
+cpdef int32_t murmurhash3_int_s32(int key, unsigned int seed):
+    """Compute the 32bit murmurhash3 of a int key at seed."""
+    cdef int32_t out
+    MurmurHash3_x86_32(&key, sizeof(int), seed, &out)
+    return out
+
+
+cpdef uint32_t murmurhash3_bytes_u32(bytes key, unsigned int seed):
+    """Compute the 32bit murmurhash3 of a bytes key at seed."""
+    cdef uint32_t out
+    MurmurHash3_x86_32(<char*> key, len(key), seed, &out)
+    return out
+
+
+cpdef int32_t murmurhash3_bytes_s32(bytes key, unsigned int seed):
+    """Compute the 32bit murmurhash3 of a bytes key at seed."""
+    cdef int32_t out
+    MurmurHash3_x86_32(<char*> key, len(key), seed, &out)
+    return out
+
+
+def _murmurhash3_bytes_array_u32(
+    const int32_t[:] key,
+    unsigned int seed,
+):
+    """Compute 32bit murmurhash3 hashes of a key int array at seed."""
+    # TODO make it possible to pass preallocated output array
+    cdef:
+        uint32_t[:] out = np.zeros(key.size, np.uint32)
+        Py_ssize_t i
+    for i in range(key.shape[0]):
+        out[i] = murmurhash3_int_u32(key[i], seed)
+    return np.asarray(out)
+
+
+def _murmurhash3_bytes_array_s32(
+    const int32_t[:] key,
+    unsigned int seed,
+):
+    """Compute 32bit murmurhash3 hashes of a key int array at seed."""
+    # TODO make it possible to pass preallocated output array
+    cdef:
+        int32_t[:] out = np.zeros(key.size, np.int32)
+        Py_ssize_t i
+    for i in range(key.shape[0]):
+        out[i] = murmurhash3_int_s32(key[i], seed)
+    return np.asarray(out)
+
+
+def murmurhash3_32(key, seed=0, positive=False):
+    """Compute the 32bit murmurhash3 of key at seed.
+
+    The underlying implementation is MurmurHash3_x86_32 generating low
+    latency 32bits hash suitable for implementing lookup tables, Bloom
+    filters, count min sketch or feature hashing.
+
+    Parameters
+    ----------
+    key : np.int32, bytes, unicode or ndarray of dtype=np.int32
+        The physical object to hash.
+
+    seed : int, default=0
+        Integer seed for the hashing algorithm.
+
+    positive : bool, default=False
+        True: the results is casted to an unsigned int
+          from 0 to 2 ** 32 - 1
+        False: the results is casted to a signed int
+          from -(2 ** 31) to 2 ** 31 - 1
+
+    Examples
+    --------
+    >>> from sklearn.utils import murmurhash3_32
+    >>> murmurhash3_32(b"Hello World!", seed=42)
+    3565178
+    """
+    if isinstance(key, bytes):
+        if positive:
+            return murmurhash3_bytes_u32(key, seed)
+        else:
+            return murmurhash3_bytes_s32(key, seed)
+    elif isinstance(key, unicode):
+        if positive:
+            return murmurhash3_bytes_u32(key.encode('utf-8'), seed)
+        else:
+            return murmurhash3_bytes_s32(key.encode('utf-8'), seed)
+    elif isinstance(key, int) or isinstance(key, np.int32):
+        if positive:
+            return murmurhash3_int_u32(<int32_t>key, seed)
+        else:
+            return murmurhash3_int_s32(<int32_t>key, seed)
+    elif isinstance(key, np.ndarray):
+        if key.dtype != np.int32:
+            raise TypeError(
+                "key.dtype should be int32, got %s" % key.dtype)
+        if positive:
+            return _murmurhash3_bytes_array_u32(key.ravel(), seed).reshape(key.shape)
+        else:
+            return _murmurhash3_bytes_array_s32(key.ravel(), seed).reshape(key.shape)
+    else:
+        raise TypeError(
+            "key %r with type %s is not supported. "
+            "Explicit conversion to bytes is required" % (key, type(key)))
@@ -0,0 +1,379 @@
+"""
+Our own implementation of the Newton algorithm
+
+Unlike the scipy.optimize version, this version of the Newton conjugate
+gradient solver uses only one function call to retrieve the
+func value, the gradient value and a callable for the Hessian matvec
+product. If the function call is very expensive (e.g. for logistic
+regression with large design matrix), this approach gives very
+significant speedups.
+"""
+
+# This is a modified file from scipy.optimize
+# Original authors: Travis Oliphant, Eric Jones
+# Modifications by Gael Varoquaux, Mathieu Blondel and Tom Dupre la Tour
+# License: BSD
+
+import warnings
+
+import numpy as np
+import scipy
+
+from ..exceptions import ConvergenceWarning
+from .fixes import line_search_wolfe1, line_search_wolfe2
+
+
+class _LineSearchError(RuntimeError):
+    pass
+
+
+def _line_search_wolfe12(
+    f, fprime, xk, pk, gfk, old_fval, old_old_fval, verbose=0, **kwargs
+):
+    """
+    Same as line_search_wolfe1, but fall back to line_search_wolfe2 if
+    suitable step length is not found, and raise an exception if a
+    suitable step length is not found.
+
+    Raises
+    ------
+    _LineSearchError
+        If no suitable step size is found.
+
+    """
+    is_verbose = verbose >= 2
+    eps = 16 * np.finfo(np.asarray(old_fval).dtype).eps
+    if is_verbose:
+        print("  Line Search")
+        print(f"    eps=16 * finfo.eps={eps}")
+        print("    try line search wolfe1")
+
+    ret = line_search_wolfe1(f, fprime, xk, pk, gfk, old_fval, old_old_fval, **kwargs)
+
+    if is_verbose:
+        _not_ = "not " if ret[0] is None else ""
+        print("    wolfe1 line search was " + _not_ + "successful")
+
+    if ret[0] is None:
+        # Have a look at the line_search method of our NewtonSolver class. We borrow
+        # the logic from there
+        # Deal with relative loss differences around machine precision.
+        args = kwargs.get("args", tuple())
+        fval = f(xk + pk, *args)
+        tiny_loss = np.abs(old_fval * eps)
+        loss_improvement = fval - old_fval
+        check = np.abs(loss_improvement) <= tiny_loss
+        if is_verbose:
+            print(
+                "    check loss |improvement| <= eps * |loss_old|:"
+                f" {np.abs(loss_improvement)} <= {tiny_loss} {check}"
+            )
+        if check:
+            # 2.1 Check sum of absolute gradients as alternative condition.
+            sum_abs_grad_old = scipy.linalg.norm(gfk, ord=1)
+            grad = fprime(xk + pk, *args)
+            sum_abs_grad = scipy.linalg.norm(grad, ord=1)
+            check = sum_abs_grad < sum_abs_grad_old
+            if is_verbose:
+                print(
+                    "    check sum(|gradient|) < sum(|gradient_old|): "
+                    f"{sum_abs_grad} < {sum_abs_grad_old} {check}"
+                )
+            if check:
+                ret = (
+                    1.0,  # step size
+                    ret[1] + 1,  # number of function evaluations
+                    ret[2] + 1,  # number of gradient evaluations
+                    fval,
+                    old_fval,
+                    grad,
+                )
+
+    if ret[0] is None:
+        # line search failed: try different one.
+        # TODO: It seems that the new check for the sum of absolute gradients above
+        # catches all cases that, earlier, ended up here. In fact, our tests never
+        # trigger this "if branch" here and we can consider to remove it.
+        if is_verbose:
+            print("    last resort: try line search wolfe2")
+        ret = line_search_wolfe2(
+            f, fprime, xk, pk, gfk, old_fval, old_old_fval, **kwargs
+        )
+        if is_verbose:
+            _not_ = "not " if ret[0] is None else ""
+            print("    wolfe2 line search was " + _not_ + "successful")
+
+    if ret[0] is None:
+        raise _LineSearchError()
+
+    return ret
+
+
+def _cg(fhess_p, fgrad, maxiter, tol, verbose=0):
+    """
+    Solve iteratively the linear system 'fhess_p . xsupi = fgrad'
+    with a conjugate gradient descent.
+
+    Parameters
+    ----------
+    fhess_p : callable
+        Function that takes the gradient as a parameter and returns the
+        matrix product of the Hessian and gradient.
+
+    fgrad : ndarray of shape (n_features,) or (n_features + 1,)
+        Gradient vector.
+
+    maxiter : int
+        Number of CG iterations.
+
+    tol : float
+        Stopping criterion.
+
+    Returns
+    -------
+    xsupi : ndarray of shape (n_features,) or (n_features + 1,)
+        Estimated solution.
+    """
+    eps = 16 * np.finfo(np.float64).eps
+    xsupi = np.zeros(len(fgrad), dtype=fgrad.dtype)
+    ri = np.copy(fgrad)  # residual = fgrad - fhess_p @ xsupi
+    psupi = -ri
+    i = 0
+    dri0 = np.dot(ri, ri)
+    # We also keep track of |p_i|^2.
+    psupi_norm2 = dri0
+    is_verbose = verbose >= 2
+
+    while i <= maxiter:
+        if np.sum(np.abs(ri)) <= tol:
+            if is_verbose:
+                print(
+                    f"  Inner CG solver iteration {i} stopped with\n"
+                    f"    sum(|residuals|) <= tol: {np.sum(np.abs(ri))} <= {tol}"
+                )
+            break
+
+        Ap = fhess_p(psupi)
+        # check curvature
+        curv = np.dot(psupi, Ap)
+        if 0 <= curv <= eps * psupi_norm2:
+            # See https://arxiv.org/abs/1803.02924, Algo 1 Capped Conjugate Gradient.
+            if is_verbose:
+                print(
+                    f"  Inner CG solver iteration {i} stopped with\n"
+                    f"    tiny_|p| = eps * ||p||^2, eps = {eps}, "
+                    f"squred L2 norm ||p||^2 = {psupi_norm2}\n"
+                    f"    curvature <= tiny_|p|: {curv} <= {eps * psupi_norm2}"
+                )
+            break
+        elif curv < 0:
+            if i > 0:
+                if is_verbose:
+                    print(
+                        f"  Inner CG solver iteration {i} stopped with negative "
+                        f"curvature, curvature = {curv}"
+                    )
+                break
+            else:
+                # fall back to steepest descent direction
+                xsupi += dri0 / curv * psupi
+                if is_verbose:
+                    print("  Inner CG solver iteration 0 fell back to steepest descent")
+                break
+        alphai = dri0 / curv
+        xsupi += alphai * psupi
+        ri += alphai * Ap
+        dri1 = np.dot(ri, ri)
+        betai = dri1 / dri0
+        psupi = -ri + betai * psupi
+        # We use  |p_i|^2 = |r_i|^2 + beta_i^2 |p_{i-1}|^2
+        psupi_norm2 = dri1 + betai**2 * psupi_norm2
+        i = i + 1
+        dri0 = dri1  # update np.dot(ri,ri) for next time.
+    if is_verbose and i > maxiter:
+        print(
+            f"  Inner CG solver stopped reaching maxiter={i - 1} with "
+            f"sum(|residuals|) = {np.sum(np.abs(ri))}"
+        )
+    return xsupi
+
+
+def _newton_cg(
+    grad_hess,
+    func,
+    grad,
+    x0,
+    args=(),
+    tol=1e-4,
+    maxiter=100,
+    maxinner=200,
+    line_search=True,
+    warn=True,
+    verbose=0,
+):
+    """
+    Minimization of scalar function of one or more variables using the
+    Newton-CG algorithm.
+
+    Parameters
+    ----------
+    grad_hess : callable
+        Should return the gradient and a callable returning the matvec product
+        of the Hessian.
+
+    func : callable
+        Should return the value of the function.
+
+    grad : callable
+        Should return the function value and the gradient. This is used
+        by the linesearch functions.
+
+    x0 : array of float
+        Initial guess.
+
+    args : tuple, default=()
+        Arguments passed to func_grad_hess, func and grad.
+
+    tol : float, default=1e-4
+        Stopping criterion. The iteration will stop when
+        ``max{|g_i | i = 1, ..., n} <= tol``
+        where ``g_i`` is the i-th component of the gradient.
+
+    maxiter : int, default=100
+        Number of Newton iterations.
+
+    maxinner : int, default=200
+        Number of CG iterations.
+
+    line_search : bool, default=True
+        Whether to use a line search or not.
+
+    warn : bool, default=True
+        Whether to warn when didn't converge.
+
+    Returns
+    -------
+    xk : ndarray of float
+        Estimated minimum.
+    """
+    x0 = np.asarray(x0).flatten()
+    xk = np.copy(x0)
+    k = 0
+
+    if line_search:
+        old_fval = func(x0, *args)
+        old_old_fval = None
+    else:
+        old_fval = 0
+
+    is_verbose = verbose > 0
+
+    # Outer loop: our Newton iteration
+    while k < maxiter:
+        # Compute a search direction pk by applying the CG method to
+        #  del2 f(xk) p = - fgrad f(xk) starting from 0.
+        fgrad, fhess_p = grad_hess(xk, *args)
+
+        absgrad = np.abs(fgrad)
+        max_absgrad = np.max(absgrad)
+        check = max_absgrad <= tol
+        if is_verbose:
+            print(f"Newton-CG iter = {k}")
+            print("  Check Convergence")
+            print(f"    max |gradient| <= tol: {max_absgrad} <= {tol} {check}")
+        if check:
+            break
+
+        maggrad = np.sum(absgrad)
+        eta = min([0.5, np.sqrt(maggrad)])
+        termcond = eta * maggrad
+
+        # Inner loop: solve the Newton update by conjugate gradient, to
+        # avoid inverting the Hessian
+        xsupi = _cg(fhess_p, fgrad, maxiter=maxinner, tol=termcond, verbose=verbose)
+
+        alphak = 1.0
+
+        if line_search:
+            try:
+                alphak, fc, gc, old_fval, old_old_fval, gfkp1 = _line_search_wolfe12(
+                    func,
+                    grad,
+                    xk,
+                    xsupi,
+                    fgrad,
+                    old_fval,
+                    old_old_fval,
+                    verbose=verbose,
+                    args=args,
+                )
+            except _LineSearchError:
+                warnings.warn("Line Search failed")
+                break
+
+        xk += alphak * xsupi  # upcast if necessary
+        k += 1
+
+    if warn and k >= maxiter:
+        warnings.warn(
+            (
+                f"newton-cg failed to converge at loss = {old_fval}. Increase the"
+                " number of iterations."
+            ),
+            ConvergenceWarning,
+        )
+    elif is_verbose:
+        print(f"  Solver did converge at loss = {old_fval}.")
+    return xk, k
+
+
+def _check_optimize_result(solver, result, max_iter=None, extra_warning_msg=None):
+    """Check the OptimizeResult for successful convergence
+
+    Parameters
+    ----------
+    solver : str
+       Solver name. Currently only `lbfgs` is supported.
+
+    result : OptimizeResult
+       Result of the scipy.optimize.minimize function.
+
+    max_iter : int, default=None
+       Expected maximum number of iterations.
+
+    extra_warning_msg : str, default=None
+        Extra warning message.
+
+    Returns
+    -------
+    n_iter : int
+       Number of iterations.
+    """
+    # handle both scipy and scikit-learn solver names
+    if solver == "lbfgs":
+        if result.status != 0:
+            try:
+                # The message is already decoded in scipy>=1.6.0
+                result_message = result.message.decode("latin1")
+            except AttributeError:
+                result_message = result.message
+            warning_msg = (
+                "{} failed to converge (status={}):\n{}.\n\n"
+                "Increase the number of iterations (max_iter) "
+                "or scale the data as shown in:\n"
+                "    https://scikit-learn.org/stable/modules/"
+                "preprocessing.html"
+            ).format(solver, result.status, result_message)
+            if extra_warning_msg is not None:
+                warning_msg += "\n" + extra_warning_msg
+            warnings.warn(warning_msg, ConvergenceWarning, stacklevel=2)
+        if max_iter is not None:
+            # In scipy <= 1.0.0, nit may exceed maxiter for lbfgs.
+            # See https://github.com/scipy/scipy/issues/7854
+            n_iter_i = min(result.nit, max_iter)
+        else:
+            n_iter_i = result.nit
+    else:
+        raise NotImplementedError
+
+    return n_iter_i
@@ -0,0 +1,129 @@
+"""
+The :mod:`sklearn.utils.parallel` customizes `joblib` tools for scikit-learn usage.
+"""
+
+import functools
+import warnings
+from functools import update_wrapper
+
+import joblib
+
+from .._config import config_context, get_config
+
+
+def _with_config(delayed_func, config):
+    """Helper function that intends to attach a config to a delayed function."""
+    if hasattr(delayed_func, "with_config"):
+        return delayed_func.with_config(config)
+    else:
+        warnings.warn(
+            (
+                "`sklearn.utils.parallel.Parallel` needs to be used in "
+                "conjunction with `sklearn.utils.parallel.delayed` instead of "
+                "`joblib.delayed` to correctly propagate the scikit-learn "
+                "configuration to the joblib workers."
+            ),
+            UserWarning,
+        )
+        return delayed_func
+
+
+class Parallel(joblib.Parallel):
+    """Tweak of :class:`joblib.Parallel` that propagates the scikit-learn configuration.
+
+    This subclass of :class:`joblib.Parallel` ensures that the active configuration
+    (thread-local) of scikit-learn is propagated to the parallel workers for the
+    duration of the execution of the parallel tasks.
+
+    The API does not change and you can refer to :class:`joblib.Parallel`
+    documentation for more details.
+
+    .. versionadded:: 1.3
+    """
+
+    def __call__(self, iterable):
+        """Dispatch the tasks and return the results.
+
+        Parameters
+        ----------
+        iterable : iterable
+            Iterable containing tuples of (delayed_function, args, kwargs) that should
+            be consumed.
+
+        Returns
+        -------
+        results : list
+            List of results of the tasks.
+        """
+        # Capture the thread-local scikit-learn configuration at the time
+        # Parallel.__call__ is issued since the tasks can be dispatched
+        # in a different thread depending on the backend and on the value of
+        # pre_dispatch and n_jobs.
+        config = get_config()
+        iterable_with_config = (
+            (_with_config(delayed_func, config), args, kwargs)
+            for delayed_func, args, kwargs in iterable
+        )
+        return super().__call__(iterable_with_config)
+
+
+# remove when https://github.com/joblib/joblib/issues/1071 is fixed
+def delayed(function):
+    """Decorator used to capture the arguments of a function.
+
+    This alternative to `joblib.delayed` is meant to be used in conjunction
+    with `sklearn.utils.parallel.Parallel`. The latter captures the scikit-
+    learn configuration by calling `sklearn.get_config()` in the current
+    thread, prior to dispatching the first task. The captured configuration is
+    then propagated and enabled for the duration of the execution of the
+    delayed function in the joblib workers.
+
+    .. versionchanged:: 1.3
+       `delayed` was moved from `sklearn.utils.fixes` to `sklearn.utils.parallel`
+       in scikit-learn 1.3.
+
+    Parameters
+    ----------
+    function : callable
+        The function to be delayed.
+
+    Returns
+    -------
+    output: tuple
+        Tuple containing the delayed function, the positional arguments, and the
+        keyword arguments.
+    """
+
+    @functools.wraps(function)
+    def delayed_function(*args, **kwargs):
+        return _FuncWrapper(function), args, kwargs
+
+    return delayed_function
+
+
+class _FuncWrapper:
+    """Load the global configuration before calling the function."""
+
+    def __init__(self, function):
+        self.function = function
+        update_wrapper(self, self.function)
+
+    def with_config(self, config):
+        self.config = config
+        return self
+
+    def __call__(self, *args, **kwargs):
+        config = getattr(self, "config", None)
+        if config is None:
+            warnings.warn(
+                (
+                    "`sklearn.utils.parallel.delayed` should be used with"
+                    " `sklearn.utils.parallel.Parallel` to make it possible to"
+                    " propagate the scikit-learn configuration of the current thread to"
+                    " the joblib workers."
+                ),
+                UserWarning,
+            )
+            config = {}
+        with config_context(**config):
+            return self.function(*args, **kwargs)
@@ -0,0 +1,103 @@
+"""
+The mod:`sklearn.utils.random` module includes utilities for random sampling.
+"""
+
+# Author: Hamzeh Alsalhi <ha258@cornell.edu>
+#
+# License: BSD 3 clause
+import array
+
+import numpy as np
+import scipy.sparse as sp
+
+from . import check_random_state
+from ._random import sample_without_replacement
+
+__all__ = ["sample_without_replacement"]
+
+
+def _random_choice_csc(n_samples, classes, class_probability=None, random_state=None):
+    """Generate a sparse random matrix given column class distributions
+
+    Parameters
+    ----------
+    n_samples : int,
+        Number of samples to draw in each column.
+
+    classes : list of size n_outputs of arrays of size (n_classes,)
+        List of classes for each column.
+
+    class_probability : list of size n_outputs of arrays of \
+        shape (n_classes,), default=None
+        Class distribution of each column. If None, uniform distribution is
+        assumed.
+
+    random_state : int, RandomState instance or None, default=None
+        Controls the randomness of the sampled classes.
+        See :term:`Glossary <random_state>`.
+
+    Returns
+    -------
+    random_matrix : sparse csc matrix of size (n_samples, n_outputs)
+
+    """
+    data = array.array("i")
+    indices = array.array("i")
+    indptr = array.array("i", [0])
+
+    for j in range(len(classes)):
+        classes[j] = np.asarray(classes[j])
+        if classes[j].dtype.kind != "i":
+            raise ValueError("class dtype %s is not supported" % classes[j].dtype)
+        classes[j] = classes[j].astype(np.int64, copy=False)
+
+        # use uniform distribution if no class_probability is given
+        if class_probability is None:
+            class_prob_j = np.empty(shape=classes[j].shape[0])
+            class_prob_j.fill(1 / classes[j].shape[0])
+        else:
+            class_prob_j = np.asarray(class_probability[j])
+
+        if not np.isclose(np.sum(class_prob_j), 1.0):
+            raise ValueError(
+                "Probability array at index {0} does not sum to one".format(j)
+            )
+
+        if class_prob_j.shape[0] != classes[j].shape[0]:
+            raise ValueError(
+                "classes[{0}] (length {1}) and "
+                "class_probability[{0}] (length {2}) have "
+                "different length.".format(
+                    j, classes[j].shape[0], class_prob_j.shape[0]
+                )
+            )
+
+        # If 0 is not present in the classes insert it with a probability 0.0
+        if 0 not in classes[j]:
+            classes[j] = np.insert(classes[j], 0, 0)
+            class_prob_j = np.insert(class_prob_j, 0, 0.0)
+
+        # If there are nonzero classes choose randomly using class_probability
+        rng = check_random_state(random_state)
+        if classes[j].shape[0] > 1:
+            index_class_0 = np.flatnonzero(classes[j] == 0).item()
+            p_nonzero = 1 - class_prob_j[index_class_0]
+            nnz = int(n_samples * p_nonzero)
+            ind_sample = sample_without_replacement(
+                n_population=n_samples, n_samples=nnz, random_state=random_state
+            )
+            indices.extend(ind_sample)
+
+            # Normalize probabilities for the nonzero elements
+            classes_j_nonzero = classes[j] != 0
+            class_probability_nz = class_prob_j[classes_j_nonzero]
+            class_probability_nz_norm = class_probability_nz / np.sum(
+                class_probability_nz
+            )
+            classes_ind = np.searchsorted(
+                class_probability_nz_norm.cumsum(), rng.uniform(size=nnz)
+            )
+            data.extend(classes[j][classes_j_nonzero][classes_ind])
+        indptr.append(len(indices))
+
+    return sp.csc_matrix((data, indices, indptr), (n_samples, len(classes)), dtype=int)
@@ -0,0 +1,745 @@
+"""
+The :mod:`sklearn.utils.sparsefuncs` module includes a collection of utilities to
+work with sparse matrices and arrays.
+"""
+
+# Authors: Manoj Kumar
+#          Thomas Unterthiner
+#          Giorgio Patrini
+#
+# License: BSD 3 clause
+import numpy as np
+import scipy.sparse as sp
+from scipy.sparse.linalg import LinearOperator
+
+from ..utils.fixes import _sparse_min_max, _sparse_nan_min_max
+from ..utils.validation import _check_sample_weight
+from .sparsefuncs_fast import (
+    csc_mean_variance_axis0 as _csc_mean_var_axis0,
+)
+from .sparsefuncs_fast import (
+    csr_mean_variance_axis0 as _csr_mean_var_axis0,
+)
+from .sparsefuncs_fast import (
+    incr_mean_variance_axis0 as _incr_mean_var_axis0,
+)
+
+
+def _raise_typeerror(X):
+    """Raises a TypeError if X is not a CSR or CSC matrix"""
+    input_type = X.format if sp.issparse(X) else type(X)
+    err = "Expected a CSR or CSC sparse matrix, got %s." % input_type
+    raise TypeError(err)
+
+
+def _raise_error_wrong_axis(axis):
+    if axis not in (0, 1):
+        raise ValueError(
+            "Unknown axis value: %d. Use 0 for rows, or 1 for columns" % axis
+        )
+
+
+def inplace_csr_column_scale(X, scale):
+    """Inplace column scaling of a CSR matrix.
+
+    Scale each feature of the data matrix by multiplying with specific scale
+    provided by the caller assuming a (n_samples, n_features) shape.
+
+    Parameters
+    ----------
+    X : sparse matrix of shape (n_samples, n_features)
+        Matrix to normalize using the variance of the features.
+        It should be of CSR format.
+
+    scale : ndarray of shape (n_features,), dtype={np.float32, np.float64}
+        Array of precomputed feature-wise values to use for scaling.
+
+    Examples
+    --------
+    >>> from sklearn.utils import sparsefuncs
+    >>> from scipy import sparse
+    >>> import numpy as np
+    >>> indptr = np.array([0, 3, 4, 4, 4])
+    >>> indices = np.array([0, 1, 2, 2])
+    >>> data = np.array([8, 1, 2, 5])
+    >>> scale = np.array([2, 3, 2])
+    >>> csr = sparse.csr_matrix((data, indices, indptr))
+    >>> csr.todense()
+    matrix([[8, 1, 2],
+            [0, 0, 5],
+            [0, 0, 0],
+            [0, 0, 0]])
+    >>> sparsefuncs.inplace_csr_column_scale(csr, scale)
+    >>> csr.todense()
+    matrix([[16,  3,  4],
+            [ 0,  0, 10],
+            [ 0,  0,  0],
+            [ 0,  0,  0]])
+    """
+    assert scale.shape[0] == X.shape[1]
+    X.data *= scale.take(X.indices, mode="clip")
+
+
+def inplace_csr_row_scale(X, scale):
+    """Inplace row scaling of a CSR matrix.
+
+    Scale each sample of the data matrix by multiplying with specific scale
+    provided by the caller assuming a (n_samples, n_features) shape.
+
+    Parameters
+    ----------
+    X : sparse matrix of shape (n_samples, n_features)
+        Matrix to be scaled. It should be of CSR format.
+
+    scale : ndarray of float of shape (n_samples,)
+        Array of precomputed sample-wise values to use for scaling.
+    """
+    assert scale.shape[0] == X.shape[0]
+    X.data *= np.repeat(scale, np.diff(X.indptr))
+
+
+def mean_variance_axis(X, axis, weights=None, return_sum_weights=False):
+    """Compute mean and variance along an axis on a CSR or CSC matrix.
+
+    Parameters
+    ----------
+    X : sparse matrix of shape (n_samples, n_features)
+        Input data. It can be of CSR or CSC format.
+
+    axis : {0, 1}
+        Axis along which the axis should be computed.
+
+    weights : ndarray of shape (n_samples,) or (n_features,), default=None
+        If axis is set to 0 shape is (n_samples,) or
+        if axis is set to 1 shape is (n_features,).
+        If it is set to None, then samples are equally weighted.
+
+        .. versionadded:: 0.24
+
+    return_sum_weights : bool, default=False
+        If True, returns the sum of weights seen for each feature
+        if `axis=0` or each sample if `axis=1`.
+
+        .. versionadded:: 0.24
+
+    Returns
+    -------
+
+    means : ndarray of shape (n_features,), dtype=floating
+        Feature-wise means.
+
+    variances : ndarray of shape (n_features,), dtype=floating
+        Feature-wise variances.
+
+    sum_weights : ndarray of shape (n_features,), dtype=floating
+        Returned if `return_sum_weights` is `True`.
+
+    Examples
+    --------
+    >>> from sklearn.utils import sparsefuncs
+    >>> from scipy import sparse
+    >>> import numpy as np
+    >>> indptr = np.array([0, 3, 4, 4, 4])
+    >>> indices = np.array([0, 1, 2, 2])
+    >>> data = np.array([8, 1, 2, 5])
+    >>> scale = np.array([2, 3, 2])
+    >>> csr = sparse.csr_matrix((data, indices, indptr))
+    >>> csr.todense()
+    matrix([[8, 1, 2],
+            [0, 0, 5],
+            [0, 0, 0],
+            [0, 0, 0]])
+    >>> sparsefuncs.mean_variance_axis(csr, axis=0)
+    (array([2.  , 0.25, 1.75]), array([12.    ,  0.1875,  4.1875]))
+    """
+    _raise_error_wrong_axis(axis)
+
+    if sp.issparse(X) and X.format == "csr":
+        if axis == 0:
+            return _csr_mean_var_axis0(
+                X, weights=weights, return_sum_weights=return_sum_weights
+            )
+        else:
+            return _csc_mean_var_axis0(
+                X.T, weights=weights, return_sum_weights=return_sum_weights
+            )
+    elif sp.issparse(X) and X.format == "csc":
+        if axis == 0:
+            return _csc_mean_var_axis0(
+                X, weights=weights, return_sum_weights=return_sum_weights
+            )
+        else:
+            return _csr_mean_var_axis0(
+                X.T, weights=weights, return_sum_weights=return_sum_weights
+            )
+    else:
+        _raise_typeerror(X)
+
+
+def incr_mean_variance_axis(X, *, axis, last_mean, last_var, last_n, weights=None):
+    """Compute incremental mean and variance along an axis on a CSR or CSC matrix.
+
+    last_mean, last_var are the statistics computed at the last step by this
+    function. Both must be initialized to 0-arrays of the proper size, i.e.
+    the number of features in X. last_n is the number of samples encountered
+    until now.
+
+    Parameters
+    ----------
+    X : CSR or CSC sparse matrix of shape (n_samples, n_features)
+        Input data.
+
+    axis : {0, 1}
+        Axis along which the axis should be computed.
+
+    last_mean : ndarray of shape (n_features,) or (n_samples,), dtype=floating
+        Array of means to update with the new data X.
+        Should be of shape (n_features,) if axis=0 or (n_samples,) if axis=1.
+
+    last_var : ndarray of shape (n_features,) or (n_samples,), dtype=floating
+        Array of variances to update with the new data X.
+        Should be of shape (n_features,) if axis=0 or (n_samples,) if axis=1.
+
+    last_n : float or ndarray of shape (n_features,) or (n_samples,), \
+            dtype=floating
+        Sum of the weights seen so far, excluding the current weights
+        If not float, it should be of shape (n_features,) if
+        axis=0 or (n_samples,) if axis=1. If float it corresponds to
+        having same weights for all samples (or features).
+
+    weights : ndarray of shape (n_samples,) or (n_features,), default=None
+        If axis is set to 0 shape is (n_samples,) or
+        if axis is set to 1 shape is (n_features,).
+        If it is set to None, then samples are equally weighted.
+
+        .. versionadded:: 0.24
+
+    Returns
+    -------
+    means : ndarray of shape (n_features,) or (n_samples,), dtype=floating
+        Updated feature-wise means if axis = 0 or
+        sample-wise means if axis = 1.
+
+    variances : ndarray of shape (n_features,) or (n_samples,), dtype=floating
+        Updated feature-wise variances if axis = 0 or
+        sample-wise variances if axis = 1.
+
+    n : ndarray of shape (n_features,) or (n_samples,), dtype=integral
+        Updated number of seen samples per feature if axis=0
+        or number of seen features per sample if axis=1.
+
+        If weights is not None, n is a sum of the weights of the seen
+        samples or features instead of the actual number of seen
+        samples or features.
+
+    Notes
+    -----
+    NaNs are ignored in the algorithm.
+
+    Examples
+    --------
+    >>> from sklearn.utils import sparsefuncs
+    >>> from scipy import sparse
+    >>> import numpy as np
+    >>> indptr = np.array([0, 3, 4, 4, 4])
+    >>> indices = np.array([0, 1, 2, 2])
+    >>> data = np.array([8, 1, 2, 5])
+    >>> scale = np.array([2, 3, 2])
+    >>> csr = sparse.csr_matrix((data, indices, indptr))
+    >>> csr.todense()
+    matrix([[8, 1, 2],
+            [0, 0, 5],
+            [0, 0, 0],
+            [0, 0, 0]])
+    >>> sparsefuncs.incr_mean_variance_axis(
+    ...     csr, axis=0, last_mean=np.zeros(3), last_var=np.zeros(3), last_n=2
+    ... )
+    (array([1.3..., 0.1..., 1.1...]), array([8.8..., 0.1..., 3.4...]),
+    array([6., 6., 6.]))
+    """
+    _raise_error_wrong_axis(axis)
+
+    if not (sp.issparse(X) and X.format in ("csc", "csr")):
+        _raise_typeerror(X)
+
+    if np.size(last_n) == 1:
+        last_n = np.full(last_mean.shape, last_n, dtype=last_mean.dtype)
+
+    if not (np.size(last_mean) == np.size(last_var) == np.size(last_n)):
+        raise ValueError("last_mean, last_var, last_n do not have the same shapes.")
+
+    if axis == 1:
+        if np.size(last_mean) != X.shape[0]:
+            raise ValueError(
+                "If axis=1, then last_mean, last_n, last_var should be of "
+                f"size n_samples {X.shape[0]} (Got {np.size(last_mean)})."
+            )
+    else:  # axis == 0
+        if np.size(last_mean) != X.shape[1]:
+            raise ValueError(
+                "If axis=0, then last_mean, last_n, last_var should be of "
+                f"size n_features {X.shape[1]} (Got {np.size(last_mean)})."
+            )
+
+    X = X.T if axis == 1 else X
+
+    if weights is not None:
+        weights = _check_sample_weight(weights, X, dtype=X.dtype)
+
+    return _incr_mean_var_axis0(
+        X, last_mean=last_mean, last_var=last_var, last_n=last_n, weights=weights
+    )
+
+
+def inplace_column_scale(X, scale):
+    """Inplace column scaling of a CSC/CSR matrix.
+
+    Scale each feature of the data matrix by multiplying with specific scale
+    provided by the caller assuming a (n_samples, n_features) shape.
+
+    Parameters
+    ----------
+    X : sparse matrix of shape (n_samples, n_features)
+        Matrix to normalize using the variance of the features. It should be
+        of CSC or CSR format.
+
+    scale : ndarray of shape (n_features,), dtype={np.float32, np.float64}
+        Array of precomputed feature-wise values to use for scaling.
+
+    Examples
+    --------
+    >>> from sklearn.utils import sparsefuncs
+    >>> from scipy import sparse
+    >>> import numpy as np
+    >>> indptr = np.array([0, 3, 4, 4, 4])
+    >>> indices = np.array([0, 1, 2, 2])
+    >>> data = np.array([8, 1, 2, 5])
+    >>> scale = np.array([2, 3, 2])
+    >>> csr = sparse.csr_matrix((data, indices, indptr))
+    >>> csr.todense()
+    matrix([[8, 1, 2],
+            [0, 0, 5],
+            [0, 0, 0],
+            [0, 0, 0]])
+    >>> sparsefuncs.inplace_column_scale(csr, scale)
+    >>> csr.todense()
+    matrix([[16,  3,  4],
+            [ 0,  0, 10],
+            [ 0,  0,  0],
+            [ 0,  0,  0]])
+    """
+    if sp.issparse(X) and X.format == "csc":
+        inplace_csr_row_scale(X.T, scale)
+    elif sp.issparse(X) and X.format == "csr":
+        inplace_csr_column_scale(X, scale)
+    else:
+        _raise_typeerror(X)
+
+
+def inplace_row_scale(X, scale):
+    """Inplace row scaling of a CSR or CSC matrix.
+
+    Scale each row of the data matrix by multiplying with specific scale
+    provided by the caller assuming a (n_samples, n_features) shape.
+
+    Parameters
+    ----------
+    X : sparse matrix of shape (n_samples, n_features)
+        Matrix to be scaled. It should be of CSR or CSC format.
+
+    scale : ndarray of shape (n_features,), dtype={np.float32, np.float64}
+        Array of precomputed sample-wise values to use for scaling.
+
+    Examples
+    --------
+    >>> from sklearn.utils import sparsefuncs
+    >>> from scipy import sparse
+    >>> import numpy as np
+    >>> indptr = np.array([0, 2, 3, 4, 5])
+    >>> indices = np.array([0, 1, 2, 3, 3])
+    >>> data = np.array([8, 1, 2, 5, 6])
+    >>> scale = np.array([2, 3, 4, 5])
+    >>> csr = sparse.csr_matrix((data, indices, indptr))
+    >>> csr.todense()
+    matrix([[8, 1, 0, 0],
+            [0, 0, 2, 0],
+            [0, 0, 0, 5],
+            [0, 0, 0, 6]])
+    >>> sparsefuncs.inplace_row_scale(csr, scale)
+    >>> csr.todense()
+     matrix([[16,  2,  0,  0],
+             [ 0,  0,  6,  0],
+             [ 0,  0,  0, 20],
+             [ 0,  0,  0, 30]])
+    """
+    if sp.issparse(X) and X.format == "csc":
+        inplace_csr_column_scale(X.T, scale)
+    elif sp.issparse(X) and X.format == "csr":
+        inplace_csr_row_scale(X, scale)
+    else:
+        _raise_typeerror(X)
+
+
+def inplace_swap_row_csc(X, m, n):
+    """Swap two rows of a CSC matrix in-place.
+
+    Parameters
+    ----------
+    X : sparse matrix of shape (n_samples, n_features)
+        Matrix whose two rows are to be swapped. It should be of
+        CSC format.
+
+    m : int
+        Index of the row of X to be swapped.
+
+    n : int
+        Index of the row of X to be swapped.
+    """
+    for t in [m, n]:
+        if isinstance(t, np.ndarray):
+            raise TypeError("m and n should be valid integers")
+
+    if m < 0:
+        m += X.shape[0]
+    if n < 0:
+        n += X.shape[0]
+
+    m_mask = X.indices == m
+    X.indices[X.indices == n] = m
+    X.indices[m_mask] = n
+
+
+def inplace_swap_row_csr(X, m, n):
+    """Swap two rows of a CSR matrix in-place.
+
+    Parameters
+    ----------
+    X : sparse matrix of shape (n_samples, n_features)
+        Matrix whose two rows are to be swapped. It should be of
+        CSR format.
+
+    m : int
+        Index of the row of X to be swapped.
+
+    n : int
+        Index of the row of X to be swapped.
+    """
+    for t in [m, n]:
+        if isinstance(t, np.ndarray):
+            raise TypeError("m and n should be valid integers")
+
+    if m < 0:
+        m += X.shape[0]
+    if n < 0:
+        n += X.shape[0]
+
+    # The following swapping makes life easier since m is assumed to be the
+    # smaller integer below.
+    if m > n:
+        m, n = n, m
+
+    indptr = X.indptr
+    m_start = indptr[m]
+    m_stop = indptr[m + 1]
+    n_start = indptr[n]
+    n_stop = indptr[n + 1]
+    nz_m = m_stop - m_start
+    nz_n = n_stop - n_start
+
+    if nz_m != nz_n:
+        # Modify indptr first
+        X.indptr[m + 2 : n] += nz_n - nz_m
+        X.indptr[m + 1] = m_start + nz_n
+        X.indptr[n] = n_stop - nz_m
+
+    X.indices = np.concatenate(
+        [
+            X.indices[:m_start],
+            X.indices[n_start:n_stop],
+            X.indices[m_stop:n_start],
+            X.indices[m_start:m_stop],
+            X.indices[n_stop:],
+        ]
+    )
+    X.data = np.concatenate(
+        [
+            X.data[:m_start],
+            X.data[n_start:n_stop],
+            X.data[m_stop:n_start],
+            X.data[m_start:m_stop],
+            X.data[n_stop:],
+        ]
+    )
+
+
+def inplace_swap_row(X, m, n):
+    """
+    Swap two rows of a CSC/CSR matrix in-place.
+
+    Parameters
+    ----------
+    X : sparse matrix of shape (n_samples, n_features)
+        Matrix whose two rows are to be swapped. It should be of CSR or
+        CSC format.
+
+    m : int
+        Index of the row of X to be swapped.
+
+    n : int
+        Index of the row of X to be swapped.
+
+    Examples
+    --------
+    >>> from sklearn.utils import sparsefuncs
+    >>> from scipy import sparse
+    >>> import numpy as np
+    >>> indptr = np.array([0, 2, 3, 3, 3])
+    >>> indices = np.array([0, 2, 2])
+    >>> data = np.array([8, 2, 5])
+    >>> csr = sparse.csr_matrix((data, indices, indptr))
+    >>> csr.todense()
+    matrix([[8, 0, 2],
+            [0, 0, 5],
+            [0, 0, 0],
+            [0, 0, 0]])
+    >>> sparsefuncs.inplace_swap_row(csr, 0, 1)
+    >>> csr.todense()
+    matrix([[0, 0, 5],
+            [8, 0, 2],
+            [0, 0, 0],
+            [0, 0, 0]])
+    """
+    if sp.issparse(X) and X.format == "csc":
+        inplace_swap_row_csc(X, m, n)
+    elif sp.issparse(X) and X.format == "csr":
+        inplace_swap_row_csr(X, m, n)
+    else:
+        _raise_typeerror(X)
+
+
+def inplace_swap_column(X, m, n):
+    """
+    Swap two columns of a CSC/CSR matrix in-place.
+
+    Parameters
+    ----------
+    X : sparse matrix of shape (n_samples, n_features)
+        Matrix whose two columns are to be swapped. It should be of
+        CSR or CSC format.
+
+    m : int
+        Index of the column of X to be swapped.
+
+    n : int
+        Index of the column of X to be swapped.
+
+    Examples
+    --------
+    >>> from sklearn.utils import sparsefuncs
+    >>> from scipy import sparse
+    >>> import numpy as np
+    >>> indptr = np.array([0, 2, 3, 3, 3])
+    >>> indices = np.array([0, 2, 2])
+    >>> data = np.array([8, 2, 5])
+    >>> csr = sparse.csr_matrix((data, indices, indptr))
+    >>> csr.todense()
+    matrix([[8, 0, 2],
+            [0, 0, 5],
+            [0, 0, 0],
+            [0, 0, 0]])
+    >>> sparsefuncs.inplace_swap_column(csr, 0, 1)
+    >>> csr.todense()
+    matrix([[0, 8, 2],
+            [0, 0, 5],
+            [0, 0, 0],
+            [0, 0, 0]])
+    """
+    if m < 0:
+        m += X.shape[1]
+    if n < 0:
+        n += X.shape[1]
+    if sp.issparse(X) and X.format == "csc":
+        inplace_swap_row_csr(X, m, n)
+    elif sp.issparse(X) and X.format == "csr":
+        inplace_swap_row_csc(X, m, n)
+    else:
+        _raise_typeerror(X)
+
+
+def min_max_axis(X, axis, ignore_nan=False):
+    """Compute minimum and maximum along an axis on a CSR or CSC matrix.
+
+     Optionally ignore NaN values.
+
+    Parameters
+    ----------
+    X : sparse matrix of shape (n_samples, n_features)
+        Input data. It should be of CSR or CSC format.
+
+    axis : {0, 1}
+        Axis along which the axis should be computed.
+
+    ignore_nan : bool, default=False
+        Ignore or passing through NaN values.
+
+        .. versionadded:: 0.20
+
+    Returns
+    -------
+
+    mins : ndarray of shape (n_features,), dtype={np.float32, np.float64}
+        Feature-wise minima.
+
+    maxs : ndarray of shape (n_features,), dtype={np.float32, np.float64}
+        Feature-wise maxima.
+    """
+    if sp.issparse(X) and X.format in ("csr", "csc"):
+        if ignore_nan:
+            return _sparse_nan_min_max(X, axis=axis)
+        else:
+            return _sparse_min_max(X, axis=axis)
+    else:
+        _raise_typeerror(X)
+
+
+def count_nonzero(X, axis=None, sample_weight=None):
+    """A variant of X.getnnz() with extension to weighting on axis 0.
+
+    Useful in efficiently calculating multilabel metrics.
+
+    Parameters
+    ----------
+    X : sparse matrix of shape (n_samples, n_labels)
+        Input data. It should be of CSR format.
+
+    axis : {0, 1}, default=None
+        The axis on which the data is aggregated.
+
+    sample_weight : array-like of shape (n_samples,), default=None
+        Weight for each row of X.
+
+    Returns
+    -------
+    nnz : int, float, ndarray of shape (n_samples,) or ndarray of shape (n_features,)
+        Number of non-zero values in the array along a given axis. Otherwise,
+        the total number of non-zero values in the array is returned.
+    """
+    if axis == -1:
+        axis = 1
+    elif axis == -2:
+        axis = 0
+    elif X.format != "csr":
+        raise TypeError("Expected CSR sparse format, got {0}".format(X.format))
+
+    # We rely here on the fact that np.diff(Y.indptr) for a CSR
+    # will return the number of nonzero entries in each row.
+    # A bincount over Y.indices will return the number of nonzeros
+    # in each column. See ``csr_matrix.getnnz`` in scipy >= 0.14.
+    if axis is None:
+        if sample_weight is None:
+            return X.nnz
+        else:
+            return np.dot(np.diff(X.indptr), sample_weight)
+    elif axis == 1:
+        out = np.diff(X.indptr)
+        if sample_weight is None:
+            # astype here is for consistency with axis=0 dtype
+            return out.astype("intp")
+        return out * sample_weight
+    elif axis == 0:
+        if sample_weight is None:
+            return np.bincount(X.indices, minlength=X.shape[1])
+        else:
+            weights = np.repeat(sample_weight, np.diff(X.indptr))
+            return np.bincount(X.indices, minlength=X.shape[1], weights=weights)
+    else:
+        raise ValueError("Unsupported axis: {0}".format(axis))
+
+
+def _get_median(data, n_zeros):
+    """Compute the median of data with n_zeros additional zeros.
+
+    This function is used to support sparse matrices; it modifies data
+    in-place.
+    """
+    n_elems = len(data) + n_zeros
+    if not n_elems:
+        return np.nan
+    n_negative = np.count_nonzero(data < 0)
+    middle, is_odd = divmod(n_elems, 2)
+    data.sort()
+
+    if is_odd:
+        return _get_elem_at_rank(middle, data, n_negative, n_zeros)
+
+    return (
+        _get_elem_at_rank(middle - 1, data, n_negative, n_zeros)
+        + _get_elem_at_rank(middle, data, n_negative, n_zeros)
+    ) / 2.0
+
+
+def _get_elem_at_rank(rank, data, n_negative, n_zeros):
+    """Find the value in data augmented with n_zeros for the given rank"""
+    if rank < n_negative:
+        return data[rank]
+    if rank - n_negative < n_zeros:
+        return 0
+    return data[rank - n_zeros]
+
+
+def csc_median_axis_0(X):
+    """Find the median across axis 0 of a CSC matrix.
+
+    It is equivalent to doing np.median(X, axis=0).
+
+    Parameters
+    ----------
+    X : sparse matrix of shape (n_samples, n_features)
+        Input data. It should be of CSC format.
+
+    Returns
+    -------
+    median : ndarray of shape (n_features,)
+        Median.
+    """
+    if not (sp.issparse(X) and X.format == "csc"):
+        raise TypeError("Expected matrix of CSC format, got %s" % X.format)
+
+    indptr = X.indptr
+    n_samples, n_features = X.shape
+    median = np.zeros(n_features)
+
+    for f_ind, (start, end) in enumerate(zip(indptr[:-1], indptr[1:])):
+        # Prevent modifying X in place
+        data = np.copy(X.data[start:end])
+        nz = n_samples - data.size
+        median[f_ind] = _get_median(data, nz)
+
+    return median
+
+
+def _implicit_column_offset(X, offset):
+    """Create an implicitly offset linear operator.
+
+    This is used by PCA on sparse data to avoid densifying the whole data
+    matrix.
+
+    Params
+    ------
+        X : sparse matrix of shape (n_samples, n_features)
+        offset : ndarray of shape (n_features,)
+
+    Returns
+    -------
+    centered : LinearOperator
+    """
+    offset = offset[None, :]
+    XT = X.T
+    return LinearOperator(
+        matvec=lambda x: X @ x - offset @ x,
+        matmat=lambda x: X @ x - offset @ x,
+        rmatvec=lambda x: XT @ x - (offset * x.sum()),
+        rmatmat=lambda x: XT @ x - offset.T @ x.sum(axis=0)[None, :],
+        dtype=X.dtype,
+        shape=X.shape,
+    )
@@ -0,0 +1,640 @@
+"""
+The :mod:`sklearn.utils.sparsefuncs_fast` module includes a collection of utilities to
+work with sparse matrices and arrays written in Cython.
+"""
+
+# Authors: Mathieu Blondel
+#          Olivier Grisel
+#          Peter Prettenhofer
+#          Lars Buitinck
+#          Giorgio Patrini
+#
+# License: BSD 3 clause
+
+from libc.math cimport fabs, sqrt, isnan
+from libc.stdint cimport intptr_t
+
+import numpy as np
+from cython cimport floating
+from ..utils._typedefs cimport float64_t, int32_t, int64_t, intp_t, uint64_t
+
+
+ctypedef fused integral:
+    int32_t
+    int64_t
+
+
+def csr_row_norms(X):
+    """Squared L2 norm of each row in CSR matrix X."""
+    if X.dtype not in [np.float32, np.float64]:
+        X = X.astype(np.float64)
+    return _sqeuclidean_row_norms_sparse(X.data, X.indptr)
+
+
+def _sqeuclidean_row_norms_sparse(
+    const floating[::1] X_data,
+    const integral[::1] X_indptr,
+):
+    cdef:
+        integral n_samples = X_indptr.shape[0] - 1
+        integral i, j
+
+    dtype = np.float32 if floating is float else np.float64
+
+    cdef floating[::1] squared_row_norms = np.zeros(n_samples, dtype=dtype)
+
+    with nogil:
+        for i in range(n_samples):
+            for j in range(X_indptr[i], X_indptr[i + 1]):
+                squared_row_norms[i] += X_data[j] * X_data[j]
+
+    return np.asarray(squared_row_norms)
+
+
+def csr_mean_variance_axis0(X, weights=None, return_sum_weights=False):
+    """Compute mean and variance along axis 0 on a CSR matrix
+
+    Uses a np.float64 accumulator.
+
+    Parameters
+    ----------
+    X : CSR sparse matrix, shape (n_samples, n_features)
+        Input data.
+
+    weights : ndarray of shape (n_samples,), dtype=floating, default=None
+        If it is set to None samples will be equally weighted.
+
+        .. versionadded:: 0.24
+
+    return_sum_weights : bool, default=False
+        If True, returns the sum of weights seen for each feature.
+
+        .. versionadded:: 0.24
+
+    Returns
+    -------
+    means : float array with shape (n_features,)
+        Feature-wise means
+
+    variances : float array with shape (n_features,)
+        Feature-wise variances
+
+    sum_weights : ndarray of shape (n_features,), dtype=floating
+        Returned if return_sum_weights is True.
+    """
+    if X.dtype not in [np.float32, np.float64]:
+        X = X.astype(np.float64)
+
+    if weights is None:
+        weights = np.ones(X.shape[0], dtype=X.dtype)
+
+    means, variances, sum_weights = _csr_mean_variance_axis0(
+        X.data, X.shape[0], X.shape[1], X.indices, X.indptr, weights)
+
+    if return_sum_weights:
+        return means, variances, sum_weights
+    return means, variances
+
+
+def _csr_mean_variance_axis0(
+    const floating[::1] X_data,
+    uint64_t n_samples,
+    uint64_t n_features,
+    const integral[:] X_indices,
+    const integral[:] X_indptr,
+    const floating[:] weights,
+):
+    # Implement the function here since variables using fused types
+    # cannot be declared directly and can only be passed as function arguments
+    cdef:
+        intp_t row_ind
+        uint64_t feature_idx
+        integral i, col_ind
+        float64_t diff
+        # means[j] contains the mean of feature j
+        float64_t[::1] means = np.zeros(n_features)
+        # variances[j] contains the variance of feature j
+        float64_t[::1] variances = np.zeros(n_features)
+
+        float64_t[::1] sum_weights = np.full(
+            fill_value=np.sum(weights, dtype=np.float64), shape=n_features
+        )
+        float64_t[::1] sum_weights_nz = np.zeros(shape=n_features)
+        float64_t[::1] correction = np.zeros(shape=n_features)
+
+        uint64_t[::1] counts = np.full(
+            fill_value=weights.shape[0], shape=n_features, dtype=np.uint64
+        )
+        uint64_t[::1] counts_nz = np.zeros(shape=n_features, dtype=np.uint64)
+
+    for row_ind in range(len(X_indptr) - 1):
+        for i in range(X_indptr[row_ind], X_indptr[row_ind + 1]):
+            col_ind = X_indices[i]
+            if not isnan(X_data[i]):
+                means[col_ind] += <float64_t>(X_data[i]) * weights[row_ind]
+                # sum of weights where X[:, col_ind] is non-zero
+                sum_weights_nz[col_ind] += weights[row_ind]
+                # number of non-zero elements of X[:, col_ind]
+                counts_nz[col_ind] += 1
+            else:
+                # sum of weights where X[:, col_ind] is not nan
+                sum_weights[col_ind] -= weights[row_ind]
+                # number of non nan elements of X[:, col_ind]
+                counts[col_ind] -= 1
+
+    for feature_idx in range(n_features):
+        means[feature_idx] /= sum_weights[feature_idx]
+
+    for row_ind in range(len(X_indptr) - 1):
+        for i in range(X_indptr[row_ind], X_indptr[row_ind + 1]):
+            col_ind = X_indices[i]
+            if not isnan(X_data[i]):
+                diff = X_data[i] - means[col_ind]
+                # correction term of the corrected 2 pass algorithm.
+                # See "Algorithms for computing the sample variance: analysis
+                # and recommendations", by Chan, Golub, and LeVeque.
+                correction[col_ind] += diff * weights[row_ind]
+                variances[col_ind] += diff * diff * weights[row_ind]
+
+    for feature_idx in range(n_features):
+        if counts[feature_idx] != counts_nz[feature_idx]:
+            correction[feature_idx] -= (
+                sum_weights[feature_idx] - sum_weights_nz[feature_idx]
+            ) * means[feature_idx]
+        correction[feature_idx] = correction[feature_idx]**2 / sum_weights[feature_idx]
+        if counts[feature_idx] != counts_nz[feature_idx]:
+            # only compute it when it's guaranteed to be non-zero to avoid
+            # catastrophic cancellation.
+            variances[feature_idx] += (
+                sum_weights[feature_idx] - sum_weights_nz[feature_idx]
+            ) * means[feature_idx]**2
+        variances[feature_idx] = (
+            (variances[feature_idx] - correction[feature_idx]) /
+            sum_weights[feature_idx]
+        )
+
+    if floating is float:
+        return (
+            np.array(means, dtype=np.float32),
+            np.array(variances, dtype=np.float32),
+            np.array(sum_weights, dtype=np.float32),
+        )
+    else:
+        return (
+            np.asarray(means), np.asarray(variances), np.asarray(sum_weights)
+        )
+
+
+def csc_mean_variance_axis0(X, weights=None, return_sum_weights=False):
+    """Compute mean and variance along axis 0 on a CSC matrix
+
+    Uses a np.float64 accumulator.
+
+    Parameters
+    ----------
+    X : CSC sparse matrix, shape (n_samples, n_features)
+        Input data.
+
+    weights : ndarray of shape (n_samples,), dtype=floating, default=None
+        If it is set to None samples will be equally weighted.
+
+        .. versionadded:: 0.24
+
+    return_sum_weights : bool, default=False
+        If True, returns the sum of weights seen for each feature.
+
+        .. versionadded:: 0.24
+
+    Returns
+    -------
+    means : float array with shape (n_features,)
+        Feature-wise means
+
+    variances : float array with shape (n_features,)
+        Feature-wise variances
+
+    sum_weights : ndarray of shape (n_features,), dtype=floating
+        Returned if return_sum_weights is True.
+    """
+    if X.dtype not in [np.float32, np.float64]:
+        X = X.astype(np.float64)
+
+    if weights is None:
+        weights = np.ones(X.shape[0], dtype=X.dtype)
+
+    means, variances, sum_weights = _csc_mean_variance_axis0(
+        X.data, X.shape[0], X.shape[1], X.indices, X.indptr, weights)
+
+    if return_sum_weights:
+        return means, variances, sum_weights
+    return means, variances
+
+
+def _csc_mean_variance_axis0(
+    const floating[::1] X_data,
+    uint64_t n_samples,
+    uint64_t n_features,
+    const integral[:] X_indices,
+    const integral[:] X_indptr,
+    const floating[:] weights,
+):
+    # Implement the function here since variables using fused types
+    # cannot be declared directly and can only be passed as function arguments
+    cdef:
+        integral i, row_ind
+        uint64_t feature_idx, col_ind
+        float64_t diff
+        # means[j] contains the mean of feature j
+        float64_t[::1] means = np.zeros(n_features)
+        # variances[j] contains the variance of feature j
+        float64_t[::1] variances = np.zeros(n_features)
+
+        float64_t[::1] sum_weights = np.full(
+            fill_value=np.sum(weights, dtype=np.float64), shape=n_features
+        )
+        float64_t[::1] sum_weights_nz = np.zeros(shape=n_features)
+        float64_t[::1] correction = np.zeros(shape=n_features)
+
+        uint64_t[::1] counts = np.full(
+            fill_value=weights.shape[0], shape=n_features, dtype=np.uint64
+        )
+        uint64_t[::1] counts_nz = np.zeros(shape=n_features, dtype=np.uint64)
+
+    for col_ind in range(n_features):
+        for i in range(X_indptr[col_ind], X_indptr[col_ind + 1]):
+            row_ind = X_indices[i]
+            if not isnan(X_data[i]):
+                means[col_ind] += <float64_t>(X_data[i]) * weights[row_ind]
+                # sum of weights where X[:, col_ind] is non-zero
+                sum_weights_nz[col_ind] += weights[row_ind]
+                # number of non-zero elements of X[:, col_ind]
+                counts_nz[col_ind] += 1
+            else:
+                # sum of weights where X[:, col_ind] is not nan
+                sum_weights[col_ind] -= weights[row_ind]
+                # number of non nan elements of X[:, col_ind]
+                counts[col_ind] -= 1
+
+    for feature_idx in range(n_features):
+        means[feature_idx] /= sum_weights[feature_idx]
+
+    for col_ind in range(n_features):
+        for i in range(X_indptr[col_ind], X_indptr[col_ind + 1]):
+            row_ind = X_indices[i]
+            if not isnan(X_data[i]):
+                diff = X_data[i] - means[col_ind]
+                # correction term of the corrected 2 pass algorithm.
+                # See "Algorithms for computing the sample variance: analysis
+                # and recommendations", by Chan, Golub, and LeVeque.
+                correction[col_ind] += diff * weights[row_ind]
+                variances[col_ind] += diff * diff * weights[row_ind]
+
+    for feature_idx in range(n_features):
+        if counts[feature_idx] != counts_nz[feature_idx]:
+            correction[feature_idx] -= (
+                sum_weights[feature_idx] - sum_weights_nz[feature_idx]
+            ) * means[feature_idx]
+        correction[feature_idx] = correction[feature_idx]**2 / sum_weights[feature_idx]
+        if counts[feature_idx] != counts_nz[feature_idx]:
+            # only compute it when it's guaranteed to be non-zero to avoid
+            # catastrophic cancellation.
+            variances[feature_idx] += (
+                sum_weights[feature_idx] - sum_weights_nz[feature_idx]
+            ) * means[feature_idx]**2
+        variances[feature_idx] = (
+            (variances[feature_idx] - correction[feature_idx])
+        ) / sum_weights[feature_idx]
+
+    if floating is float:
+        return (np.array(means, dtype=np.float32),
+                np.array(variances, dtype=np.float32),
+                np.array(sum_weights, dtype=np.float32))
+    else:
+        return (
+            np.asarray(means), np.asarray(variances), np.asarray(sum_weights)
+        )
+
+
+def incr_mean_variance_axis0(X, last_mean, last_var, last_n, weights=None):
+    """Compute mean and variance along axis 0 on a CSR or CSC matrix.
+
+    last_mean, last_var are the statistics computed at the last step by this
+    function. Both must be initialized to 0.0. last_n is the
+    number of samples encountered until now and is initialized at 0.
+
+    Parameters
+    ----------
+    X : CSR or CSC sparse matrix, shape (n_samples, n_features)
+      Input data.
+
+    last_mean : float array with shape (n_features,)
+      Array of feature-wise means to update with the new data X.
+
+    last_var : float array with shape (n_features,)
+      Array of feature-wise var to update with the new data X.
+
+    last_n : float array with shape (n_features,)
+      Sum of the weights seen so far (if weights are all set to 1
+      this will be the same as number of samples seen so far, before X).
+
+    weights : float array with shape (n_samples,) or None. If it is set
+      to None samples will be equally weighted.
+
+    Returns
+    -------
+    updated_mean : float array with shape (n_features,)
+      Feature-wise means
+
+    updated_variance : float array with shape (n_features,)
+      Feature-wise variances
+
+    updated_n : int array with shape (n_features,)
+      Updated number of samples seen
+
+    Notes
+    -----
+    NaNs are ignored during the computation.
+
+    References
+    ----------
+    T. Chan, G. Golub, R. LeVeque. Algorithms for computing the sample
+      variance: recommendations, The American Statistician, Vol. 37, No. 3,
+      pp. 242-247
+
+    Also, see the non-sparse implementation of this in
+    `utils.extmath._batch_mean_variance_update`.
+
+    """
+    if X.dtype not in [np.float32, np.float64]:
+        X = X.astype(np.float64)
+    X_dtype = X.dtype
+    if weights is None:
+        weights = np.ones(X.shape[0], dtype=X_dtype)
+    elif weights.dtype not in [np.float32, np.float64]:
+        weights = weights.astype(np.float64, copy=False)
+    if last_n.dtype not in [np.float32, np.float64]:
+        last_n = last_n.astype(np.float64, copy=False)
+
+    return _incr_mean_variance_axis0(X.data,
+                                     np.sum(weights),
+                                     X.shape[1],
+                                     X.indices,
+                                     X.indptr,
+                                     X.format,
+                                     last_mean.astype(X_dtype, copy=False),
+                                     last_var.astype(X_dtype, copy=False),
+                                     last_n.astype(X_dtype, copy=False),
+                                     weights.astype(X_dtype, copy=False))
+
+
+def _incr_mean_variance_axis0(
+    const floating[:] X_data,
+    floating n_samples,
+    uint64_t n_features,
+    const int[:] X_indices,
+    # X_indptr might be either int32 or int64
+    const integral[:] X_indptr,
+    str X_format,
+    floating[:] last_mean,
+    floating[:] last_var,
+    floating[:] last_n,
+    # previous sum of the weights (ie float)
+    const floating[:] weights,
+):
+    # Implement the function here since variables using fused types
+    # cannot be declared directly and can only be passed as function arguments
+    cdef:
+        uint64_t i
+
+        # last = stats until now
+        # new = the current increment
+        # updated = the aggregated stats
+        # when arrays, they are indexed by i per-feature
+        floating[::1] new_mean
+        floating[::1] new_var
+        floating[::1] updated_mean
+        floating[::1] updated_var
+
+    if floating is float:
+        dtype = np.float32
+    else:
+        dtype = np.float64
+
+    new_mean = np.zeros(n_features, dtype=dtype)
+    new_var = np.zeros_like(new_mean, dtype=dtype)
+    updated_mean = np.zeros_like(new_mean, dtype=dtype)
+    updated_var = np.zeros_like(new_mean, dtype=dtype)
+
+    cdef:
+        floating[::1] new_n
+        floating[::1] updated_n
+        floating[::1] last_over_new_n
+
+    # Obtain new stats first
+    updated_n = np.zeros(shape=n_features, dtype=dtype)
+    last_over_new_n = np.zeros_like(updated_n, dtype=dtype)
+
+    # X can be a CSR or CSC matrix
+    if X_format == 'csr':
+        new_mean, new_var, new_n = _csr_mean_variance_axis0(
+            X_data, n_samples, n_features, X_indices, X_indptr, weights)
+    else:  # X_format == 'csc'
+        new_mean, new_var, new_n = _csc_mean_variance_axis0(
+            X_data, n_samples, n_features, X_indices, X_indptr, weights)
+
+    # First pass
+    cdef bint is_first_pass = True
+    for i in range(n_features):
+        if last_n[i] > 0:
+            is_first_pass = False
+            break
+
+    if is_first_pass:
+        return np.asarray(new_mean), np.asarray(new_var), np.asarray(new_n)
+
+    for i in range(n_features):
+        updated_n[i] = last_n[i] + new_n[i]
+
+    # Next passes
+    for i in range(n_features):
+        if new_n[i] > 0:
+            last_over_new_n[i] = dtype(last_n[i]) / dtype(new_n[i])
+            # Unnormalized stats
+            last_mean[i] *= last_n[i]
+            last_var[i] *= last_n[i]
+            new_mean[i] *= new_n[i]
+            new_var[i] *= new_n[i]
+            # Update stats
+            updated_var[i] = (
+                last_var[i] + new_var[i] +
+                last_over_new_n[i] / updated_n[i] *
+                (last_mean[i] / last_over_new_n[i] - new_mean[i])**2
+            )
+            updated_mean[i] = (last_mean[i] + new_mean[i]) / updated_n[i]
+            updated_var[i] /= updated_n[i]
+        else:
+            updated_var[i] = last_var[i]
+            updated_mean[i] = last_mean[i]
+            updated_n[i] = last_n[i]
+
+    return (
+        np.asarray(updated_mean),
+        np.asarray(updated_var),
+        np.asarray(updated_n),
+    )
+
+
+def inplace_csr_row_normalize_l1(X):
+    """Normalize inplace the rows of a CSR matrix or array by their L1 norm.
+
+    Parameters
+    ----------
+    X : scipy.sparse.csr_matrix and scipy.sparse.csr_array, \
+            shape=(n_samples, n_features)
+        The input matrix or array to be modified inplace.
+
+    Examples
+    --------
+    >>> from scipy.sparse import csr_matrix
+    >>> from sklearn.utils.sparsefuncs_fast import inplace_csr_row_normalize_l1
+    >>> X = csr_matrix(([1.0, 2.0, 3.0], [0, 2, 3], [0, 3, 4]), shape=(3, 4))
+    >>> X.toarray()
+    array([[1., 2., 0., 0.],
+           [0., 0., 3., 0.],
+           [0., 0., 0., 4.]])
+    >>> inplace_csr_row_normalize_l1(X)
+    >>> X.toarray()
+    array([[0.33...   , 0.66...   , 0.        , 0.        ],
+           [0.        , 0.        , 1.        , 0.        ],
+           [0.        , 0.        , 0.        , 1.        ]])
+    """
+    _inplace_csr_row_normalize_l1(X.data, X.shape, X.indices, X.indptr)
+
+
+def _inplace_csr_row_normalize_l1(
+    floating[:] X_data,
+    shape,
+    const integral[:] X_indices,
+    const integral[:] X_indptr,
+):
+    cdef:
+        uint64_t n_samples = shape[0]
+
+        # the column indices for row i are stored in:
+        #    indices[indptr[i]:indices[i+1]]
+        # and their corresponding values are stored in:
+        #    data[indptr[i]:indptr[i+1]]
+        uint64_t i
+        integral j
+        double sum_
+
+    for i in range(n_samples):
+        sum_ = 0.0
+
+        for j in range(X_indptr[i], X_indptr[i + 1]):
+            sum_ += fabs(X_data[j])
+
+        if sum_ == 0.0:
+            # do not normalize empty rows (can happen if CSR is not pruned
+            # correctly)
+            continue
+
+        for j in range(X_indptr[i], X_indptr[i + 1]):
+            X_data[j] /= sum_
+
+
+def inplace_csr_row_normalize_l2(X):
+    """Normalize inplace the rows of a CSR matrix or array by their L2 norm.
+
+    Parameters
+    ----------
+    X : scipy.sparse.csr_matrix, shape=(n_samples, n_features)
+        The input matrix or array to be modified inplace.
+
+    Examples
+    --------
+    >>> from scipy.sparse import csr_matrix
+    >>> from sklearn.utils.sparsefuncs_fast import inplace_csr_row_normalize_l2
+    >>> X = csr_matrix(([1.0, 2.0, 3.0], [0, 2, 3], [0, 3, 4]), shape=(3, 4))
+    >>> X.toarray()
+    array([[1., 2., 0., 0.],
+           [0., 0., 3., 0.],
+           [0., 0., 0., 4.]])
+    >>> inplace_csr_row_normalize_l2(X)
+    >>> X.toarray()
+    array([[0.44...   , 0.89...   , 0.        , 0.        ],
+           [0.        , 0.        , 1.        , 0.        ],
+           [0.        , 0.        , 0.        , 1.        ]])
+    """
+    _inplace_csr_row_normalize_l2(X.data, X.shape, X.indices, X.indptr)
+
+
+def _inplace_csr_row_normalize_l2(
+    floating[:] X_data,
+    shape,
+    const integral[:] X_indices,
+    const integral[:] X_indptr,
+):
+    cdef:
+        uint64_t n_samples = shape[0]
+        uint64_t i
+        integral j
+        double sum_
+
+    for i in range(n_samples):
+        sum_ = 0.0
+
+        for j in range(X_indptr[i], X_indptr[i + 1]):
+            sum_ += (X_data[j] * X_data[j])
+
+        if sum_ == 0.0:
+            # do not normalize empty rows (can happen if CSR is not pruned
+            # correctly)
+            continue
+
+        sum_ = sqrt(sum_)
+
+        for j in range(X_indptr[i], X_indptr[i + 1]):
+            X_data[j] /= sum_
+
+
+def assign_rows_csr(
+    X,
+    const intptr_t[:] X_rows,
+    const intptr_t[:] out_rows,
+    floating[:, ::1] out,
+):
+    """Densify selected rows of a CSR matrix into a preallocated array.
+
+    Like out[out_rows] = X[X_rows].toarray() but without copying.
+    No-copy supported for both dtype=np.float32 and dtype=np.float64.
+
+    Parameters
+    ----------
+    X : scipy.sparse.csr_matrix, shape=(n_samples, n_features)
+    X_rows : array, dtype=np.intp, shape=n_rows
+    out_rows : array, dtype=np.intp, shape=n_rows
+    out : array, shape=(arbitrary, n_features)
+    """
+    cdef:
+        # intptr_t (npy_intp, np.intp in Python) is what np.where returns,
+        # but int is what scipy.sparse uses.
+        intp_t i, ind, j, k
+        intptr_t rX
+        const floating[:] data = X.data
+        const int32_t[:] indices = X.indices
+        const int32_t[:] indptr = X.indptr
+
+    if X_rows.shape[0] != out_rows.shape[0]:
+        raise ValueError("cannot assign %d rows to %d"
+                         % (X_rows.shape[0], out_rows.shape[0]))
+
+    with nogil:
+        for k in range(out_rows.shape[0]):
+            out[out_rows[k]] = 0.0
+
+        for i in range(X_rows.shape[0]):
+            rX = X_rows[i]
+            for ind in range(indptr[rX], indptr[rX + 1]):
+                j = indices[ind]
+                out[out_rows[i], j] = data[ind]
@@ -0,0 +1,346 @@
+//-----------------------------------------------------------------------------
+// MurmurHash3 was written by Austin Appleby, and is placed in the public
+// domain. The author hereby disclaims copyright to this source code.
+
+// Note - The x86 and x64 versions do _not_ produce the same results, as the
+// algorithms are optimized for their respective platforms. You can still
+// compile and run any of them on any platform, but your performance with the
+// non-native version will be less than optimal.
+
+#include "MurmurHash3.h"
+
+//-----------------------------------------------------------------------------
+// Platform-specific functions and macros
+
+// Microsoft Visual Studio
+
+#if defined(_MSC_VER)
+
+#define FORCE_INLINE	__forceinline
+
+#include <stdlib.h>
+
+#define ROTL32(x,y)	_rotl(x,y)
+#define ROTL64(x,y)	_rotl64(x,y)
+
+#define BIG_CONSTANT(x) (x)
+
+// Other compilers
+
+#else	// defined(_MSC_VER)
+
+#if defined(GNUC) && ((GNUC > 4) || (GNUC == 4 && GNUC_MINOR >= 4))
+
+/* gcc version >= 4.4 4.1 = RHEL 5, 4.4 = RHEL 6.
+ * Don't inline for RHEL 5 gcc which is 4.1 */
+#define FORCE_INLINE attribute((always_inline))
+
+#else
+
+#define FORCE_INLINE
+
+#endif
+
+
+inline uint32_t rotl32 ( uint32_t x, int8_t r )
+{
+  return (x << r) | (x >> (32 - r));
+}
+
+inline uint64_t rotl64 ( uint64_t x, int8_t r )
+{
+  return (x << r) | (x >> (64 - r));
+}
+
+#define	ROTL32(x,y)	rotl32(x,y)
+#define ROTL64(x,y)	rotl64(x,y)
+
+#define BIG_CONSTANT(x) (x##LLU)
+
+#endif // !defined(_MSC_VER)
+
+//-----------------------------------------------------------------------------
+// Block read - if your platform needs to do endian-swapping or can only
+// handle aligned reads, do the conversion here
+
+FORCE_INLINE uint32_t getblock ( const uint32_t * p, int i )
+{
+  return p[i];
+}
+
+FORCE_INLINE uint64_t getblock ( const uint64_t * p, int i )
+{
+  return p[i];
+}
+
+//-----------------------------------------------------------------------------
+// Finalization mix - force all bits of a hash block to avalanche
+
+FORCE_INLINE uint32_t fmix ( uint32_t h )
+{
+  h ^= h >> 16;
+  h *= 0x85ebca6b;
+  h ^= h >> 13;
+  h *= 0xc2b2ae35;
+  h ^= h >> 16;
+
+  return h;
+}
+
+//----------
+
+FORCE_INLINE uint64_t fmix ( uint64_t k )
+{
+  k ^= k >> 33;
+  k *= BIG_CONSTANT(0xff51afd7ed558ccd);
+  k ^= k >> 33;
+  k *= BIG_CONSTANT(0xc4ceb9fe1a85ec53);
+  k ^= k >> 33;
+
+  return k;
+}
+
+//-----------------------------------------------------------------------------
+
+void MurmurHash3_x86_32 ( const void * key, int len,
+                          uint32_t seed, void * out )
+{
+  const uint8_t * data = (const uint8_t*)key;
+  const int nblocks = len / 4;
+
+  uint32_t h1 = seed;
+
+  uint32_t c1 = 0xcc9e2d51;
+  uint32_t c2 = 0x1b873593;
+
+  //----------
+  // body
+
+  const uint32_t * blocks = (const uint32_t *)(data + nblocks*4);
+
+  for(int i = -nblocks; i; i++)
+  {
+    uint32_t k1 = getblock(blocks,i);
+
+    k1 *= c1;
+    k1 = ROTL32(k1,15);
+    k1 *= c2;
+
+    h1 ^= k1;
+    h1 = ROTL32(h1,13);
+    h1 = h1*5+0xe6546b64;
+  }
+
+  //----------
+  // tail
+
+  const uint8_t * tail = (const uint8_t*)(data + nblocks*4);
+
+  uint32_t k1 = 0;
+
+  switch(len & 3)
+  {
+  case 3: k1 ^= tail[2] << 16;
+  case 2: k1 ^= tail[1] << 8;
+  case 1: k1 ^= tail[0];
+          k1 *= c1; k1 = ROTL32(k1,15); k1 *= c2; h1 ^= k1;
+  }
+
+  //----------
+  // finalization
+
+  h1 ^= len;
+
+  h1 = fmix(h1);
+
+  *(uint32_t*)out = h1;
+}
+
+//-----------------------------------------------------------------------------
+
+void MurmurHash3_x86_128 ( const void * key, const int len,
+                           uint32_t seed, void * out )
+{
+  const uint8_t * data = (const uint8_t*)key;
+  const int nblocks = len / 16;
+
+  uint32_t h1 = seed;
+  uint32_t h2 = seed;
+  uint32_t h3 = seed;
+  uint32_t h4 = seed;
+
+  uint32_t c1 = 0x239b961b;
+  uint32_t c2 = 0xab0e9789;
+  uint32_t c3 = 0x38b34ae5;
+  uint32_t c4 = 0xa1e38b93;
+
+  //----------
+  // body
+
+  const uint32_t * blocks = (const uint32_t *)(data + nblocks*16);
+
+  for(int i = -nblocks; i; i++)
+  {
+    uint32_t k1 = getblock(blocks,i*4+0);
+    uint32_t k2 = getblock(blocks,i*4+1);
+    uint32_t k3 = getblock(blocks,i*4+2);
+    uint32_t k4 = getblock(blocks,i*4+3);
+
+    k1 *= c1; k1  = ROTL32(k1,15); k1 *= c2; h1 ^= k1;
+
+    h1 = ROTL32(h1,19); h1 += h2; h1 = h1*5+0x561ccd1b;
+
+    k2 *= c2; k2  = ROTL32(k2,16); k2 *= c3; h2 ^= k2;
+
+    h2 = ROTL32(h2,17); h2 += h3; h2 = h2*5+0x0bcaa747;
+
+    k3 *= c3; k3  = ROTL32(k3,17); k3 *= c4; h3 ^= k3;
+
+    h3 = ROTL32(h3,15); h3 += h4; h3 = h3*5+0x96cd1c35;
+
+    k4 *= c4; k4  = ROTL32(k4,18); k4 *= c1; h4 ^= k4;
+
+    h4 = ROTL32(h4,13); h4 += h1; h4 = h4*5+0x32ac3b17;
+  }
+
+  //----------
+  // tail
+
+  const uint8_t * tail = (const uint8_t*)(data + nblocks*16);
+
+  uint32_t k1 = 0;
+  uint32_t k2 = 0;
+  uint32_t k3 = 0;
+  uint32_t k4 = 0;
+
+  switch(len & 15)
+  {
+  case 15: k4 ^= tail[14] << 16;
+  case 14: k4 ^= tail[13] << 8;
+  case 13: k4 ^= tail[12] << 0;
+           k4 *= c4; k4  = ROTL32(k4,18); k4 *= c1; h4 ^= k4;
+
+  case 12: k3 ^= tail[11] << 24;
+  case 11: k3 ^= tail[10] << 16;
+  case 10: k3 ^= tail[ 9] << 8;
+  case  9: k3 ^= tail[ 8] << 0;
+           k3 *= c3; k3  = ROTL32(k3,17); k3 *= c4; h3 ^= k3;
+
+  case  8: k2 ^= tail[ 7] << 24;
+  case  7: k2 ^= tail[ 6] << 16;
+  case  6: k2 ^= tail[ 5] << 8;
+  case  5: k2 ^= tail[ 4] << 0;
+           k2 *= c2; k2  = ROTL32(k2,16); k2 *= c3; h2 ^= k2;
+
+  case  4: k1 ^= tail[ 3] << 24;
+  case  3: k1 ^= tail[ 2] << 16;
+  case  2: k1 ^= tail[ 1] << 8;
+  case  1: k1 ^= tail[ 0] << 0;
+           k1 *= c1; k1  = ROTL32(k1,15); k1 *= c2; h1 ^= k1;
+  }
+
+  //----------
+  // finalization
+
+  h1 ^= len; h2 ^= len; h3 ^= len; h4 ^= len;
+
+  h1 += h2; h1 += h3; h1 += h4;
+  h2 += h1; h3 += h1; h4 += h1;
+
+  h1 = fmix(h1);
+  h2 = fmix(h2);
+  h3 = fmix(h3);
+  h4 = fmix(h4);
+
+  h1 += h2; h1 += h3; h1 += h4;
+  h2 += h1; h3 += h1; h4 += h1;
+
+  ((uint32_t*)out)[0] = h1;
+  ((uint32_t*)out)[1] = h2;
+  ((uint32_t*)out)[2] = h3;
+  ((uint32_t*)out)[3] = h4;
+}
+
+//-----------------------------------------------------------------------------
+
+void MurmurHash3_x64_128 ( const void * key, const int len,
+                           const uint32_t seed, void * out )
+{
+  const uint8_t * data = (const uint8_t*)key;
+  const int nblocks = len / 16;
+
+  uint64_t h1 = seed;
+  uint64_t h2 = seed;
+
+  uint64_t c1 = BIG_CONSTANT(0x87c37b91114253d5);
+  uint64_t c2 = BIG_CONSTANT(0x4cf5ad432745937f);
+
+  //----------
+  // body
+
+  const uint64_t * blocks = (const uint64_t *)(data);
+
+  for(int i = 0; i < nblocks; i++)
+  {
+    uint64_t k1 = getblock(blocks,i*2+0);
+    uint64_t k2 = getblock(blocks,i*2+1);
+
+    k1 *= c1; k1  = ROTL64(k1,31); k1 *= c2; h1 ^= k1;
+
+    h1 = ROTL64(h1,27); h1 += h2; h1 = h1*5+0x52dce729;
+
+    k2 *= c2; k2  = ROTL64(k2,33); k2 *= c1; h2 ^= k2;
+
+    h2 = ROTL64(h2,31); h2 += h1; h2 = h2*5+0x38495ab5;
+  }
+
+  //----------
+  // tail
+
+  const uint8_t * tail = (const uint8_t*)(data + nblocks*16);
+
+  uint64_t k1 = 0;
+  uint64_t k2 = 0;
+
+  switch(len & 15)
+  {
+  case 15: k2 ^= uint64_t(tail[14]) << 48;
+  case 14: k2 ^= uint64_t(tail[13]) << 40;
+  case 13: k2 ^= uint64_t(tail[12]) << 32;
+  case 12: k2 ^= uint64_t(tail[11]) << 24;
+  case 11: k2 ^= uint64_t(tail[10]) << 16;
+  case 10: k2 ^= uint64_t(tail[ 9]) << 8;
+  case  9: k2 ^= uint64_t(tail[ 8]) << 0;
+           k2 *= c2; k2  = ROTL64(k2,33); k2 *= c1; h2 ^= k2;
+
+  case  8: k1 ^= uint64_t(tail[ 7]) << 56;
+  case  7: k1 ^= uint64_t(tail[ 6]) << 48;
+  case  6: k1 ^= uint64_t(tail[ 5]) << 40;
+  case  5: k1 ^= uint64_t(tail[ 4]) << 32;
+  case  4: k1 ^= uint64_t(tail[ 3]) << 24;
+  case  3: k1 ^= uint64_t(tail[ 2]) << 16;
+  case  2: k1 ^= uint64_t(tail[ 1]) << 8;
+  case  1: k1 ^= uint64_t(tail[ 0]) << 0;
+           k1 *= c1; k1  = ROTL64(k1,31); k1 *= c2; h1 ^= k1;
+  }
+
+  //----------
+  // finalization
+
+  h1 ^= len; h2 ^= len;
+
+  h1 += h2;
+  h2 += h1;
+
+  h1 = fmix(h1);
+  h2 = fmix(h2);
+
+  h1 += h2;
+  h2 += h1;
+
+  ((uint64_t*)out)[0] = h1;
+  ((uint64_t*)out)[1] = h2;
+}
+
+//-----------------------------------------------------------------------------
+
@@ -0,0 +1,45 @@
+//-----------------------------------------------------------------------------
+// MurmurHash3 was written by Austin Appleby, and is placed in the public
+// domain. The author hereby disclaims copyright to this source code.
+
+#ifndef _MURMURHASH3_H_
+#define _MURMURHASH3_H_
+
+//-----------------------------------------------------------------------------
+// Platform-specific functions and macros
+
+// Microsoft Visual Studio
+
+#if defined(_MSC_VER)
+
+typedef unsigned char uint8_t;
+typedef unsigned long uint32_t;
+typedef unsigned __int64 uint64_t;
+
+// Other compilers
+
+#else	// defined(_MSC_VER)
+
+#include <stdint.h>
+
+#endif // !defined(_MSC_VER)
+
+//-----------------------------------------------------------------------------
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+void MurmurHash3_x86_32  ( const void * key, int len, uint32_t seed, void * out );
+
+void MurmurHash3_x86_128 ( const void * key, int len, uint32_t seed, void * out );
+
+void MurmurHash3_x64_128 ( const void * key, int len, uint32_t seed, void * out );
+
+#ifdef __cplusplus
+}
+#endif
+
+//-----------------------------------------------------------------------------
+
+#endif // _MURMURHASH3_H_
@@ -0,0 +1,69 @@
+import numpy as np
+
+from .extmath import stable_cumsum
+
+
+def _weighted_percentile(array, sample_weight, percentile=50):
+    """Compute weighted percentile
+
+    Computes lower weighted percentile. If `array` is a 2D array, the
+    `percentile` is computed along the axis 0.
+
+        .. versionchanged:: 0.24
+            Accepts 2D `array`.
+
+    Parameters
+    ----------
+    array : 1D or 2D array
+        Values to take the weighted percentile of.
+
+    sample_weight: 1D or 2D array
+        Weights for each value in `array`. Must be same shape as `array` or
+        of shape `(array.shape[0],)`.
+
+    percentile: int or float, default=50
+        Percentile to compute. Must be value between 0 and 100.
+
+    Returns
+    -------
+    percentile : int if `array` 1D, ndarray if `array` 2D
+        Weighted percentile.
+    """
+    n_dim = array.ndim
+    if n_dim == 0:
+        return array[()]
+    if array.ndim == 1:
+        array = array.reshape((-1, 1))
+    # When sample_weight 1D, repeat for each array.shape[1]
+    if array.shape != sample_weight.shape and array.shape[0] == sample_weight.shape[0]:
+        sample_weight = np.tile(sample_weight, (array.shape[1], 1)).T
+    sorted_idx = np.argsort(array, axis=0)
+    sorted_weights = np.take_along_axis(sample_weight, sorted_idx, axis=0)
+
+    # Find index of median prediction for each sample
+    weight_cdf = stable_cumsum(sorted_weights, axis=0)
+    adjusted_percentile = percentile / 100 * weight_cdf[-1]
+
+    # For percentile=0, ignore leading observations with sample_weight=0. GH20528
+    mask = adjusted_percentile == 0
+    adjusted_percentile[mask] = np.nextafter(
+        adjusted_percentile[mask], adjusted_percentile[mask] + 1
+    )
+
+    percentile_idx = np.array(
+        [
+            np.searchsorted(weight_cdf[:, i], adjusted_percentile[i])
+            for i in range(weight_cdf.shape[1])
+        ]
+    )
+    percentile_idx = np.array(percentile_idx)
+    # In rare cases, percentile_idx equals to sorted_idx.shape[0]
+    max_idx = sorted_idx.shape[0] - 1
+    percentile_idx = np.apply_along_axis(
+        lambda x: np.clip(x, 0, max_idx), axis=0, arr=percentile_idx
+    )
+
+    col_index = np.arange(array.shape[1])
+    percentile_in_sorted = sorted_idx[percentile_idx, col_index]
+    percentile = array[percentile_in_sorted, col_index]
+    return percentile[0] if n_dim == 1 else percentile
@@ -0,0 +1,16 @@
+import pytest
+from numpy.testing import assert_allclose
+
+from sklearn.utils import check_random_state
+from sklearn.utils._arpack import _init_arpack_v0
+
+
+@pytest.mark.parametrize("seed", range(100))
+def test_init_arpack_v0(seed):
+    # check that the initialization a sampling from an uniform distribution
+    # where we can fix the random state
+    size = 1000
+    v0 = _init_arpack_v0(size, seed)
+
+    rng = check_random_state(seed)
+    assert_allclose(v0, rng.uniform(-1, 1, size=size))
@@ -0,0 +1,506 @@
+import re
+from functools import partial
+
+import numpy
+import pytest
+from numpy.testing import assert_allclose
+
+from sklearn._config import config_context
+from sklearn.base import BaseEstimator
+from sklearn.utils._array_api import (
+    _ArrayAPIWrapper,
+    _asarray_with_order,
+    _atol_for_type,
+    _average,
+    _convert_to_numpy,
+    _estimator_with_converted_arrays,
+    _is_numpy_namespace,
+    _nanmax,
+    _nanmin,
+    _NumPyAPIWrapper,
+    _ravel,
+    device,
+    get_namespace,
+    indexing_dtype,
+    supported_float_dtypes,
+    yield_namespace_device_dtype_combinations,
+)
+from sklearn.utils._testing import (
+    _array_api_for_tests,
+    skip_if_array_api_compat_not_configured,
+)
+from sklearn.utils.fixes import _IS_32BIT
+
+
+@pytest.mark.parametrize("X", [numpy.asarray([1, 2, 3]), [1, 2, 3]])
+def test_get_namespace_ndarray_default(X):
+    """Check that get_namespace returns NumPy wrapper"""
+    xp_out, is_array_api_compliant = get_namespace(X)
+    assert isinstance(xp_out, _NumPyAPIWrapper)
+    assert not is_array_api_compliant
+
+
+def test_get_namespace_ndarray_creation_device():
+    """Check expected behavior with device and creation functions."""
+    X = numpy.asarray([1, 2, 3])
+    xp_out, _ = get_namespace(X)
+
+    full_array = xp_out.full(10, fill_value=2.0, device="cpu")
+    assert_allclose(full_array, [2.0] * 10)
+
+    with pytest.raises(ValueError, match="Unsupported device"):
+        xp_out.zeros(10, device="cuda")
+
+
+@skip_if_array_api_compat_not_configured
+def test_get_namespace_ndarray_with_dispatch():
+    """Test get_namespace on NumPy ndarrays."""
+    array_api_compat = pytest.importorskip("array_api_compat")
+
+    X_np = numpy.asarray([[1, 2, 3]])
+
+    with config_context(array_api_dispatch=True):
+        xp_out, is_array_api_compliant = get_namespace(X_np)
+        assert is_array_api_compliant
+        assert xp_out is array_api_compat.numpy
+
+
+@skip_if_array_api_compat_not_configured
+def test_get_namespace_array_api():
+    """Test get_namespace for ArrayAPI arrays."""
+    xp = pytest.importorskip("array_api_strict")
+
+    X_np = numpy.asarray([[1, 2, 3]])
+    X_xp = xp.asarray(X_np)
+    with config_context(array_api_dispatch=True):
+        xp_out, is_array_api_compliant = get_namespace(X_xp)
+        assert is_array_api_compliant
+
+        with pytest.raises(TypeError):
+            xp_out, is_array_api_compliant = get_namespace(X_xp, X_np)
+
+
+class _AdjustableNameAPITestWrapper(_ArrayAPIWrapper):
+    """API wrapper that has an adjustable name. Used for testing."""
+
+    def __init__(self, array_namespace, name):
+        super().__init__(array_namespace=array_namespace)
+        self.__name__ = name
+
+
+def test_array_api_wrapper_astype():
+    """Test _ArrayAPIWrapper for ArrayAPIs that is not NumPy."""
+    array_api_strict = pytest.importorskip("array_api_strict")
+    xp_ = _AdjustableNameAPITestWrapper(array_api_strict, "array_api_strict")
+    xp = _ArrayAPIWrapper(xp_)
+
+    X = xp.asarray(([[1, 2, 3], [3, 4, 5]]), dtype=xp.float64)
+    X_converted = xp.astype(X, xp.float32)
+    assert X_converted.dtype == xp.float32
+
+    X_converted = xp.asarray(X, dtype=xp.float32)
+    assert X_converted.dtype == xp.float32
+
+
+@pytest.mark.parametrize("array_api", ["numpy", "array_api_strict"])
+def test_asarray_with_order(array_api):
+    """Test _asarray_with_order passes along order for NumPy arrays."""
+    xp = pytest.importorskip(array_api)
+
+    X = xp.asarray([1.2, 3.4, 5.1])
+    X_new = _asarray_with_order(X, order="F", xp=xp)
+
+    X_new_np = numpy.asarray(X_new)
+    assert X_new_np.flags["F_CONTIGUOUS"]
+
+
+def test_asarray_with_order_ignored():
+    """Test _asarray_with_order ignores order for Generic ArrayAPI."""
+    xp = pytest.importorskip("array_api_strict")
+    xp_ = _AdjustableNameAPITestWrapper(xp, "array_api_strict")
+
+    X = numpy.asarray([[1.2, 3.4, 5.1], [3.4, 5.5, 1.2]], order="C")
+    X = xp_.asarray(X)
+
+    X_new = _asarray_with_order(X, order="F", xp=xp_)
+
+    X_new_np = numpy.asarray(X_new)
+    assert X_new_np.flags["C_CONTIGUOUS"]
+    assert not X_new_np.flags["F_CONTIGUOUS"]
+
+
+@pytest.mark.parametrize(
+    "array_namespace, device, dtype_name", yield_namespace_device_dtype_combinations()
+)
+@pytest.mark.parametrize(
+    "weights, axis, normalize, expected",
+    [
+        # normalize = True
+        (None, None, True, 3.5),
+        (None, 0, True, [2.5, 3.5, 4.5]),
+        (None, 1, True, [2, 5]),
+        ([True, False], 0, True, [1, 2, 3]),  # boolean weights
+        ([True, True, False], 1, True, [1.5, 4.5]),  # boolean weights
+        ([0.4, 0.1], 0, True, [1.6, 2.6, 3.6]),
+        ([0.4, 0.2, 0.2], 1, True, [1.75, 4.75]),
+        ([1, 2], 0, True, [3, 4, 5]),
+        ([1, 1, 2], 1, True, [2.25, 5.25]),
+        ([[1, 2, 3], [1, 2, 3]], 0, True, [2.5, 3.5, 4.5]),
+        ([[1, 2, 1], [2, 2, 2]], 1, True, [2, 5]),
+        # normalize = False
+        (None, None, False, 21),
+        (None, 0, False, [5, 7, 9]),
+        (None, 1, False, [6, 15]),
+        ([True, False], 0, False, [1, 2, 3]),  # boolean weights
+        ([True, True, False], 1, False, [3, 9]),  # boolean weights
+        ([0.4, 0.1], 0, False, [0.8, 1.3, 1.8]),
+        ([0.4, 0.2, 0.2], 1, False, [1.4, 3.8]),
+        ([1, 2], 0, False, [9, 12, 15]),
+        ([1, 1, 2], 1, False, [9, 21]),
+        ([[1, 2, 3], [1, 2, 3]], 0, False, [5, 14, 27]),
+        ([[1, 2, 1], [2, 2, 2]], 1, False, [8, 30]),
+    ],
+)
+def test_average(
+    array_namespace, device, dtype_name, weights, axis, normalize, expected
+):
+    xp = _array_api_for_tests(array_namespace, device)
+    array_in = numpy.asarray([[1, 2, 3], [4, 5, 6]], dtype=dtype_name)
+    array_in = xp.asarray(array_in, device=device)
+    if weights is not None:
+        weights = numpy.asarray(weights, dtype=dtype_name)
+        weights = xp.asarray(weights, device=device)
+
+    with config_context(array_api_dispatch=True):
+        result = _average(array_in, axis=axis, weights=weights, normalize=normalize)
+
+    assert getattr(array_in, "device", None) == getattr(result, "device", None)
+
+    result = _convert_to_numpy(result, xp)
+    assert_allclose(result, expected, atol=_atol_for_type(dtype_name))
+
+
+@pytest.mark.parametrize(
+    "array_namespace, device, dtype_name",
+    yield_namespace_device_dtype_combinations(include_numpy_namespaces=False),
+)
+def test_average_raises_with_wrong_dtype(array_namespace, device, dtype_name):
+    xp = _array_api_for_tests(array_namespace, device)
+
+    array_in = numpy.asarray([2, 0], dtype=dtype_name) + 1j * numpy.asarray(
+        [4, 3], dtype=dtype_name
+    )
+    complex_type_name = array_in.dtype.name
+    if not hasattr(xp, complex_type_name):
+        # This is the case for cupy as of March 2024 for instance.
+        pytest.skip(f"{array_namespace} does not support {complex_type_name}")
+
+    array_in = xp.asarray(array_in, device=device)
+
+    err_msg = "Complex floating point values are not supported by average."
+    with (
+        config_context(array_api_dispatch=True),
+        pytest.raises(NotImplementedError, match=err_msg),
+    ):
+        _average(array_in)
+
+
+@pytest.mark.parametrize(
+    "array_namespace, device, dtype_name",
+    yield_namespace_device_dtype_combinations(include_numpy_namespaces=True),
+)
+@pytest.mark.parametrize(
+    "axis, weights, error, error_msg",
+    (
+        (
+            None,
+            [1, 2],
+            TypeError,
+            "Axis must be specified",
+        ),
+        (
+            0,
+            [[1, 2]],
+            TypeError,
+            "1D weights expected",
+        ),
+        (
+            0,
+            [1, 2, 3, 4],
+            ValueError,
+            "Length of weights",
+        ),
+        (0, [-1, 1], ZeroDivisionError, "Weights sum to zero, can't be normalized"),
+    ),
+)
+def test_average_raises_with_invalid_parameters(
+    array_namespace, device, dtype_name, axis, weights, error, error_msg
+):
+    xp = _array_api_for_tests(array_namespace, device)
+
+    array_in = numpy.asarray([[1, 2, 3], [4, 5, 6]], dtype=dtype_name)
+    array_in = xp.asarray(array_in, device=device)
+
+    weights = numpy.asarray(weights, dtype=dtype_name)
+    weights = xp.asarray(weights, device=device)
+
+    with config_context(array_api_dispatch=True), pytest.raises(error, match=error_msg):
+        _average(array_in, axis=axis, weights=weights)
+
+
+def test_device_raises_if_no_input():
+    err_msg = re.escape(
+        "At least one input array expected after filtering with remove_none=True, "
+        "remove_types=[str]. Got none. Original types: []."
+    )
+    with pytest.raises(ValueError, match=err_msg):
+        device()
+
+    err_msg = re.escape(
+        "At least one input array expected after filtering with remove_none=True, "
+        "remove_types=[str]. Got none. Original types: [NoneType, str]."
+    )
+    with pytest.raises(ValueError, match=err_msg):
+        device(None, "name")
+
+
+def test_device_inspection():
+    class Device:
+        def __init__(self, name):
+            self.name = name
+
+        def __eq__(self, device):
+            return self.name == device.name
+
+        def __hash__(self):
+            raise TypeError("Device object is not hashable")
+
+        def __str__(self):
+            return self.name
+
+    class Array:
+        def __init__(self, device_name):
+            self.device = Device(device_name)
+
+    # Sanity check: ensure our Device mock class is non hashable, to
+    # accurately account for non-hashable device objects in some array
+    # libraries, because of which the `device` inspection function should'nt
+    # make use of hash lookup tables (in particular, not use `set`)
+    with pytest.raises(TypeError):
+        hash(Array("device").device)
+
+    # Test raise if on different devices
+    err_msg = "Input arrays use different devices: cpu, mygpu"
+    with pytest.raises(ValueError, match=err_msg):
+        device(Array("cpu"), Array("mygpu"))
+
+    # Test expected value is returned otherwise
+    array1 = Array("device")
+    array2 = Array("device")
+
+    assert array1.device == device(array1)
+    assert array1.device == device(array1, array2)
+    assert array1.device == device(array1, array1, array2)
+
+
+# TODO: add cupy and cupy.array_api to the list of libraries once the
+# the following upstream issue has been fixed:
+# https://github.com/cupy/cupy/issues/8180
+@skip_if_array_api_compat_not_configured
+@pytest.mark.parametrize("library", ["numpy", "array_api_strict", "torch"])
+@pytest.mark.parametrize(
+    "X,reduction,expected",
+    [
+        ([1, 2, numpy.nan], _nanmin, 1),
+        ([1, -2, -numpy.nan], _nanmin, -2),
+        ([numpy.inf, numpy.inf], _nanmin, numpy.inf),
+        (
+            [[1, 2, 3], [numpy.nan, numpy.nan, numpy.nan], [4, 5, 6.0]],
+            partial(_nanmin, axis=0),
+            [1.0, 2.0, 3.0],
+        ),
+        (
+            [[1, 2, 3], [numpy.nan, numpy.nan, numpy.nan], [4, 5, 6.0]],
+            partial(_nanmin, axis=1),
+            [1.0, numpy.nan, 4.0],
+        ),
+        ([1, 2, numpy.nan], _nanmax, 2),
+        ([1, 2, numpy.nan], _nanmax, 2),
+        ([-numpy.inf, -numpy.inf], _nanmax, -numpy.inf),
+        (
+            [[1, 2, 3], [numpy.nan, numpy.nan, numpy.nan], [4, 5, 6.0]],
+            partial(_nanmax, axis=0),
+            [4.0, 5.0, 6.0],
+        ),
+        (
+            [[1, 2, 3], [numpy.nan, numpy.nan, numpy.nan], [4, 5, 6.0]],
+            partial(_nanmax, axis=1),
+            [3.0, numpy.nan, 6.0],
+        ),
+    ],
+)
+def test_nan_reductions(library, X, reduction, expected):
+    """Check NaN reductions like _nanmin and _nanmax"""
+    xp = pytest.importorskip(library)
+
+    with config_context(array_api_dispatch=True):
+        result = reduction(xp.asarray(X))
+
+    result = _convert_to_numpy(result, xp)
+    assert_allclose(result, expected)
+
+
+@pytest.mark.parametrize(
+    "namespace, _device, _dtype", yield_namespace_device_dtype_combinations()
+)
+def test_ravel(namespace, _device, _dtype):
+    xp = _array_api_for_tests(namespace, _device)
+
+    array = [[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12]]
+    array_xp = xp.asarray(array, device=_device)
+    with config_context(array_api_dispatch=True):
+        result = _ravel(array_xp)
+
+    result = _convert_to_numpy(result, xp)
+    expected = numpy.ravel(array, order="C")
+
+    assert_allclose(expected, result)
+
+    if _is_numpy_namespace(xp):
+        assert numpy.asarray(result).flags["C_CONTIGUOUS"]
+
+
+@skip_if_array_api_compat_not_configured
+@pytest.mark.parametrize("library", ["cupy", "torch", "cupy.array_api"])
+def test_convert_to_numpy_gpu(library):  # pragma: nocover
+    """Check convert_to_numpy for GPU backed libraries."""
+    xp = pytest.importorskip(library)
+
+    if library == "torch":
+        if not xp.backends.cuda.is_built():
+            pytest.skip("test requires cuda")
+        X_gpu = xp.asarray([1.0, 2.0, 3.0], device="cuda")
+    else:
+        X_gpu = xp.asarray([1.0, 2.0, 3.0])
+
+    X_cpu = _convert_to_numpy(X_gpu, xp=xp)
+    expected_output = numpy.asarray([1.0, 2.0, 3.0])
+    assert_allclose(X_cpu, expected_output)
+
+
+def test_convert_to_numpy_cpu():
+    """Check convert_to_numpy for PyTorch CPU arrays."""
+    torch = pytest.importorskip("torch")
+    X_torch = torch.asarray([1.0, 2.0, 3.0], device="cpu")
+
+    X_cpu = _convert_to_numpy(X_torch, xp=torch)
+    expected_output = numpy.asarray([1.0, 2.0, 3.0])
+    assert_allclose(X_cpu, expected_output)
+
+
+class SimpleEstimator(BaseEstimator):
+    def fit(self, X, y=None):
+        self.X_ = X
+        self.n_features_ = X.shape[0]
+        return self
+
+
+@skip_if_array_api_compat_not_configured
+@pytest.mark.parametrize(
+    "array_namespace, converter",
+    [
+        ("torch", lambda array: array.cpu().numpy()),
+        ("array_api_strict", lambda array: numpy.asarray(array)),
+        ("cupy.array_api", lambda array: array._array.get()),
+    ],
+)
+def test_convert_estimator_to_ndarray(array_namespace, converter):
+    """Convert estimator attributes to ndarray."""
+    xp = pytest.importorskip(array_namespace)
+
+    X = xp.asarray([[1.3, 4.5]])
+    est = SimpleEstimator().fit(X)
+
+    new_est = _estimator_with_converted_arrays(est, converter)
+    assert isinstance(new_est.X_, numpy.ndarray)
+
+
+@skip_if_array_api_compat_not_configured
+def test_convert_estimator_to_array_api():
+    """Convert estimator attributes to ArrayAPI arrays."""
+    xp = pytest.importorskip("array_api_strict")
+
+    X_np = numpy.asarray([[1.3, 4.5]])
+    est = SimpleEstimator().fit(X_np)
+
+    new_est = _estimator_with_converted_arrays(est, lambda array: xp.asarray(array))
+    assert hasattr(new_est.X_, "__array_namespace__")
+
+
+def test_reshape_behavior():
+    """Check reshape behavior with copy and is strict with non-tuple shape."""
+    xp = _NumPyAPIWrapper()
+    X = xp.asarray([[1, 2, 3], [3, 4, 5]])
+
+    X_no_copy = xp.reshape(X, (-1,), copy=False)
+    assert X_no_copy.base is X
+
+    X_copy = xp.reshape(X, (6, 1), copy=True)
+    assert X_copy.base is not X.base
+
+    with pytest.raises(TypeError, match="shape must be a tuple"):
+        xp.reshape(X, -1)
+
+
+@pytest.mark.parametrize("wrapper", [_ArrayAPIWrapper, _NumPyAPIWrapper])
+def test_get_namespace_array_api_isdtype(wrapper):
+    """Test isdtype implementation from _ArrayAPIWrapper and _NumPyAPIWrapper."""
+
+    if wrapper == _ArrayAPIWrapper:
+        xp_ = pytest.importorskip("array_api_strict")
+        xp = _ArrayAPIWrapper(xp_)
+    else:
+        xp = _NumPyAPIWrapper()
+
+    assert xp.isdtype(xp.float32, xp.float32)
+    assert xp.isdtype(xp.float32, "real floating")
+    assert xp.isdtype(xp.float64, "real floating")
+    assert not xp.isdtype(xp.int32, "real floating")
+
+    for dtype in supported_float_dtypes(xp):
+        assert xp.isdtype(dtype, "real floating")
+
+    assert xp.isdtype(xp.bool, "bool")
+    assert not xp.isdtype(xp.float32, "bool")
+
+    assert xp.isdtype(xp.int16, "signed integer")
+    assert not xp.isdtype(xp.uint32, "signed integer")
+
+    assert xp.isdtype(xp.uint16, "unsigned integer")
+    assert not xp.isdtype(xp.int64, "unsigned integer")
+
+    assert xp.isdtype(xp.int64, "numeric")
+    assert xp.isdtype(xp.float32, "numeric")
+    assert xp.isdtype(xp.uint32, "numeric")
+
+    assert not xp.isdtype(xp.float32, "complex floating")
+
+    if wrapper == _NumPyAPIWrapper:
+        assert not xp.isdtype(xp.int8, "complex floating")
+        assert xp.isdtype(xp.complex64, "complex floating")
+        assert xp.isdtype(xp.complex128, "complex floating")
+
+    with pytest.raises(ValueError, match="Unrecognized data type"):
+        assert xp.isdtype(xp.int16, "unknown")
+
+
+@pytest.mark.parametrize(
+    "namespace, _device, _dtype", yield_namespace_device_dtype_combinations()
+)
+def test_indexing_dtype(namespace, _device, _dtype):
+    xp = _array_api_for_tests(namespace, _device)
+
+    if _IS_32BIT:
+        assert indexing_dtype(xp) == xp.int32
+    else:
+        assert indexing_dtype(xp) == xp.int64
@@ -0,0 +1,40 @@
+import numpy as np
+import pytest
+
+from sklearn.utils._testing import assert_allclose
+from sklearn.utils.arrayfuncs import _all_with_any_reduction_axis_1, min_pos
+
+
+def test_min_pos():
+    # Check that min_pos returns a positive value and that it's consistent
+    # between float and double
+    X = np.random.RandomState(0).randn(100)
+
+    min_double = min_pos(X)
+    min_float = min_pos(X.astype(np.float32))
+
+    assert_allclose(min_double, min_float)
+    assert min_double >= 0
+
+
+@pytest.mark.parametrize("dtype", [np.float32, np.float64])
+def test_min_pos_no_positive(dtype):
+    # Check that the return value of min_pos is the maximum representable
+    # value of the input dtype when all input elements are <= 0 (#19328)
+    X = np.full(100, -1.0).astype(dtype, copy=False)
+
+    assert min_pos(X) == np.finfo(dtype).max
+
+
+@pytest.mark.parametrize(
+    "dtype", [np.int16, np.int32, np.int64, np.float32, np.float64]
+)
+@pytest.mark.parametrize("value", [0, 1.5, -1])
+def test_all_with_any_reduction_axis_1(dtype, value):
+    # Check that return value is False when there is no row equal to `value`
+    X = np.arange(12, dtype=dtype).reshape(3, 4)
+    assert not _all_with_any_reduction_axis_1(X, value=value)
+
+    # Make a row equal to `value`
+    X[1, :] = value
+    assert _all_with_any_reduction_axis_1(X, value=value)
@@ -0,0 +1,32 @@
+import warnings
+
+import numpy as np
+import pytest
+
+from sklearn.utils import Bunch
+
+
+def test_bunch_attribute_deprecation():
+    """Check that bunch raises deprecation message with `__getattr__`."""
+    bunch = Bunch()
+    values = np.asarray([1, 2, 3])
+    msg = (
+        "Key: 'values', is deprecated in 1.3 and will be "
+        "removed in 1.5. Please use 'grid_values' instead"
+    )
+    bunch._set_deprecated(
+        values, new_key="grid_values", deprecated_key="values", warning_message=msg
+    )
+
+    with warnings.catch_warnings():
+        # Does not warn for "grid_values"
+        warnings.simplefilter("error")
+        v = bunch["grid_values"]
+
+    assert v is values
+
+    with pytest.warns(FutureWarning, match=msg):
+        # Warns for "values"
+        v = bunch["values"]
+
+    assert v is values
@@ -0,0 +1,73 @@
+import warnings
+from itertools import chain
+
+import pytest
+
+from sklearn import config_context
+from sklearn.utils._chunking import gen_even_slices, get_chunk_n_rows
+from sklearn.utils._testing import assert_array_equal
+
+
+def test_gen_even_slices():
+    # check that gen_even_slices contains all samples
+    some_range = range(10)
+    joined_range = list(chain(*[some_range[slice] for slice in gen_even_slices(10, 3)]))
+    assert_array_equal(some_range, joined_range)
+
+
+@pytest.mark.parametrize(
+    ("row_bytes", "max_n_rows", "working_memory", "expected"),
+    [
+        (1024, None, 1, 1024),
+        (1024, None, 0.99999999, 1023),
+        (1023, None, 1, 1025),
+        (1025, None, 1, 1023),
+        (1024, None, 2, 2048),
+        (1024, 7, 1, 7),
+        (1024 * 1024, None, 1, 1),
+    ],
+)
+def test_get_chunk_n_rows(row_bytes, max_n_rows, working_memory, expected):
+    with warnings.catch_warnings():
+        warnings.simplefilter("error", UserWarning)
+        actual = get_chunk_n_rows(
+            row_bytes=row_bytes,
+            max_n_rows=max_n_rows,
+            working_memory=working_memory,
+        )
+
+    assert actual == expected
+    assert type(actual) is type(expected)
+    with config_context(working_memory=working_memory):
+        with warnings.catch_warnings():
+            warnings.simplefilter("error", UserWarning)
+            actual = get_chunk_n_rows(row_bytes=row_bytes, max_n_rows=max_n_rows)
+        assert actual == expected
+        assert type(actual) is type(expected)
+
+
+def test_get_chunk_n_rows_warns():
+    """Check that warning is raised when working_memory is too low."""
+    row_bytes = 1024 * 1024 + 1
+    max_n_rows = None
+    working_memory = 1
+    expected = 1
+
+    warn_msg = (
+        "Could not adhere to working_memory config. Currently 1MiB, 2MiB required."
+    )
+    with pytest.warns(UserWarning, match=warn_msg):
+        actual = get_chunk_n_rows(
+            row_bytes=row_bytes,
+            max_n_rows=max_n_rows,
+            working_memory=working_memory,
+        )
+
+    assert actual == expected
+    assert type(actual) is type(expected)
+
+    with config_context(working_memory=working_memory):
+        with pytest.warns(UserWarning, match=warn_msg):
+            actual = get_chunk_n_rows(row_bytes=row_bytes, max_n_rows=max_n_rows)
+        assert actual == expected
+        assert type(actual) is type(expected)
@@ -0,0 +1,316 @@
+import numpy as np
+import pytest
+from numpy.testing import assert_allclose
+
+from sklearn.datasets import make_blobs
+from sklearn.linear_model import LogisticRegression
+from sklearn.tree import DecisionTreeClassifier
+from sklearn.utils._testing import assert_almost_equal, assert_array_almost_equal
+from sklearn.utils.class_weight import compute_class_weight, compute_sample_weight
+from sklearn.utils.fixes import CSC_CONTAINERS
+
+
+def test_compute_class_weight():
+    # Test (and demo) compute_class_weight.
+    y = np.asarray([2, 2, 2, 3, 3, 4])
+    classes = np.unique(y)
+
+    cw = compute_class_weight("balanced", classes=classes, y=y)
+    # total effect of samples is preserved
+    class_counts = np.bincount(y)[2:]
+    assert_almost_equal(np.dot(cw, class_counts), y.shape[0])
+    assert cw[0] < cw[1] < cw[2]
+
+
+@pytest.mark.parametrize(
+    "y_type, class_weight, classes, err_msg",
+    [
+        (
+            "numeric",
+            "balanced",
+            np.arange(4),
+            "classes should have valid labels that are in y",
+        ),
+        # Non-regression for https://github.com/scikit-learn/scikit-learn/issues/8312
+        (
+            "numeric",
+            {"label_not_present": 1.0},
+            np.arange(4),
+            r"The classes, \[0, 1, 2, 3\], are not in class_weight",
+        ),
+        (
+            "numeric",
+            "balanced",
+            np.arange(2),
+            "classes should include all valid labels",
+        ),
+        (
+            "numeric",
+            {0: 1.0, 1: 2.0},
+            np.arange(2),
+            "classes should include all valid labels",
+        ),
+        (
+            "string",
+            {"dogs": 3, "cat": 2},
+            np.array(["dog", "cat"]),
+            r"The classes, \['dog'\], are not in class_weight",
+        ),
+    ],
+)
+def test_compute_class_weight_not_present(y_type, class_weight, classes, err_msg):
+    # Raise error when y does not contain all class labels
+    y = (
+        np.asarray([0, 0, 0, 1, 1, 2])
+        if y_type == "numeric"
+        else np.asarray(["dog", "cat", "dog"])
+    )
+
+    print(y)
+    with pytest.raises(ValueError, match=err_msg):
+        compute_class_weight(class_weight, classes=classes, y=y)
+
+
+def test_compute_class_weight_dict():
+    classes = np.arange(3)
+    class_weights = {0: 1.0, 1: 2.0, 2: 3.0}
+    y = np.asarray([0, 0, 1, 2])
+    cw = compute_class_weight(class_weights, classes=classes, y=y)
+
+    # When the user specifies class weights, compute_class_weights should just
+    # return them.
+    assert_array_almost_equal(np.asarray([1.0, 2.0, 3.0]), cw)
+
+    # When a class weight is specified that isn't in classes, the weight is ignored
+    class_weights = {0: 1.0, 1: 2.0, 2: 3.0, 4: 1.5}
+    cw = compute_class_weight(class_weights, classes=classes, y=y)
+    assert_allclose([1.0, 2.0, 3.0], cw)
+
+    class_weights = {-1: 5.0, 0: 4.0, 1: 2.0, 2: 3.0}
+    cw = compute_class_weight(class_weights, classes=classes, y=y)
+    assert_allclose([4.0, 2.0, 3.0], cw)
+
+
+def test_compute_class_weight_invariance():
+    # Test that results with class_weight="balanced" is invariant wrt
+    # class imbalance if the number of samples is identical.
+    # The test uses a balanced two class dataset with 100 datapoints.
+    # It creates three versions, one where class 1 is duplicated
+    # resulting in 150 points of class 1 and 50 of class 0,
+    # one where there are 50 points in class 1 and 150 in class 0,
+    # and one where there are 100 points of each class (this one is balanced
+    # again).
+    # With balancing class weights, all three should give the same model.
+    X, y = make_blobs(centers=2, random_state=0)
+    # create dataset where class 1 is duplicated twice
+    X_1 = np.vstack([X] + [X[y == 1]] * 2)
+    y_1 = np.hstack([y] + [y[y == 1]] * 2)
+    # create dataset where class 0 is duplicated twice
+    X_0 = np.vstack([X] + [X[y == 0]] * 2)
+    y_0 = np.hstack([y] + [y[y == 0]] * 2)
+    # duplicate everything
+    X_ = np.vstack([X] * 2)
+    y_ = np.hstack([y] * 2)
+    # results should be identical
+    logreg1 = LogisticRegression(class_weight="balanced").fit(X_1, y_1)
+    logreg0 = LogisticRegression(class_weight="balanced").fit(X_0, y_0)
+    logreg = LogisticRegression(class_weight="balanced").fit(X_, y_)
+    assert_array_almost_equal(logreg1.coef_, logreg0.coef_)
+    assert_array_almost_equal(logreg.coef_, logreg0.coef_)
+
+
+def test_compute_class_weight_balanced_negative():
+    # Test compute_class_weight when labels are negative
+    # Test with balanced class labels.
+    classes = np.array([-2, -1, 0])
+    y = np.asarray([-1, -1, 0, 0, -2, -2])
+
+    cw = compute_class_weight("balanced", classes=classes, y=y)
+    assert len(cw) == len(classes)
+    assert_array_almost_equal(cw, np.array([1.0, 1.0, 1.0]))
+
+    # Test with unbalanced class labels.
+    y = np.asarray([-1, 0, 0, -2, -2, -2])
+
+    cw = compute_class_weight("balanced", classes=classes, y=y)
+    assert len(cw) == len(classes)
+    class_counts = np.bincount(y + 2)
+    assert_almost_equal(np.dot(cw, class_counts), y.shape[0])
+    assert_array_almost_equal(cw, [2.0 / 3, 2.0, 1.0])
+
+
+def test_compute_class_weight_balanced_unordered():
+    # Test compute_class_weight when classes are unordered
+    classes = np.array([1, 0, 3])
+    y = np.asarray([1, 0, 0, 3, 3, 3])
+
+    cw = compute_class_weight("balanced", classes=classes, y=y)
+    class_counts = np.bincount(y)[classes]
+    assert_almost_equal(np.dot(cw, class_counts), y.shape[0])
+    assert_array_almost_equal(cw, [2.0, 1.0, 2.0 / 3])
+
+
+def test_compute_class_weight_default():
+    # Test for the case where no weight is given for a present class.
+    # Current behaviour is to assign the unweighted classes a weight of 1.
+    y = np.asarray([2, 2, 2, 3, 3, 4])
+    classes = np.unique(y)
+    classes_len = len(classes)
+
+    # Test for non specified weights
+    cw = compute_class_weight(None, classes=classes, y=y)
+    assert len(cw) == classes_len
+    assert_array_almost_equal(cw, np.ones(3))
+
+    # Tests for partly specified weights
+    cw = compute_class_weight({2: 1.5}, classes=classes, y=y)
+    assert len(cw) == classes_len
+    assert_array_almost_equal(cw, [1.5, 1.0, 1.0])
+
+    cw = compute_class_weight({2: 1.5, 4: 0.5}, classes=classes, y=y)
+    assert len(cw) == classes_len
+    assert_array_almost_equal(cw, [1.5, 1.0, 0.5])
+
+
+def test_compute_sample_weight():
+    # Test (and demo) compute_sample_weight.
+    # Test with balanced classes
+    y = np.asarray([1, 1, 1, 2, 2, 2])
+    sample_weight = compute_sample_weight("balanced", y)
+    assert_array_almost_equal(sample_weight, [1.0, 1.0, 1.0, 1.0, 1.0, 1.0])
+
+    # Test with user-defined weights
+    sample_weight = compute_sample_weight({1: 2, 2: 1}, y)
+    assert_array_almost_equal(sample_weight, [2.0, 2.0, 2.0, 1.0, 1.0, 1.0])
+
+    # Test with column vector of balanced classes
+    y = np.asarray([[1], [1], [1], [2], [2], [2]])
+    sample_weight = compute_sample_weight("balanced", y)
+    assert_array_almost_equal(sample_weight, [1.0, 1.0, 1.0, 1.0, 1.0, 1.0])
+
+    # Test with unbalanced classes
+    y = np.asarray([1, 1, 1, 2, 2, 2, 3])
+    sample_weight = compute_sample_weight("balanced", y)
+    expected_balanced = np.array(
+        [0.7777, 0.7777, 0.7777, 0.7777, 0.7777, 0.7777, 2.3333]
+    )
+    assert_array_almost_equal(sample_weight, expected_balanced, decimal=4)
+
+    # Test with `None` weights
+    sample_weight = compute_sample_weight(None, y)
+    assert_array_almost_equal(sample_weight, [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0])
+
+    # Test with multi-output of balanced classes
+    y = np.asarray([[1, 0], [1, 0], [1, 0], [2, 1], [2, 1], [2, 1]])
+    sample_weight = compute_sample_weight("balanced", y)
+    assert_array_almost_equal(sample_weight, [1.0, 1.0, 1.0, 1.0, 1.0, 1.0])
+
+    # Test with multi-output with user-defined weights
+    y = np.asarray([[1, 0], [1, 0], [1, 0], [2, 1], [2, 1], [2, 1]])
+    sample_weight = compute_sample_weight([{1: 2, 2: 1}, {0: 1, 1: 2}], y)
+    assert_array_almost_equal(sample_weight, [2.0, 2.0, 2.0, 2.0, 2.0, 2.0])
+
+    # Test with multi-output of unbalanced classes
+    y = np.asarray([[1, 0], [1, 0], [1, 0], [2, 1], [2, 1], [2, 1], [3, -1]])
+    sample_weight = compute_sample_weight("balanced", y)
+    assert_array_almost_equal(sample_weight, expected_balanced**2, decimal=3)
+
+
+def test_compute_sample_weight_with_subsample():
+    # Test compute_sample_weight with subsamples specified.
+    # Test with balanced classes and all samples present
+    y = np.asarray([1, 1, 1, 2, 2, 2])
+    sample_weight = compute_sample_weight("balanced", y, indices=range(6))
+    assert_array_almost_equal(sample_weight, [1.0, 1.0, 1.0, 1.0, 1.0, 1.0])
+
+    # Test with column vector of balanced classes and all samples present
+    y = np.asarray([[1], [1], [1], [2], [2], [2]])
+    sample_weight = compute_sample_weight("balanced", y, indices=range(6))
+    assert_array_almost_equal(sample_weight, [1.0, 1.0, 1.0, 1.0, 1.0, 1.0])
+
+    # Test with a subsample
+    y = np.asarray([1, 1, 1, 2, 2, 2])
+    sample_weight = compute_sample_weight("balanced", y, indices=range(4))
+    assert_array_almost_equal(sample_weight, [2.0 / 3, 2.0 / 3, 2.0 / 3, 2.0, 2.0, 2.0])
+
+    # Test with a bootstrap subsample
+    y = np.asarray([1, 1, 1, 2, 2, 2])
+    sample_weight = compute_sample_weight("balanced", y, indices=[0, 1, 1, 2, 2, 3])
+    expected_balanced = np.asarray([0.6, 0.6, 0.6, 3.0, 3.0, 3.0])
+    assert_array_almost_equal(sample_weight, expected_balanced)
+
+    # Test with a bootstrap subsample for multi-output
+    y = np.asarray([[1, 0], [1, 0], [1, 0], [2, 1], [2, 1], [2, 1]])
+    sample_weight = compute_sample_weight("balanced", y, indices=[0, 1, 1, 2, 2, 3])
+    assert_array_almost_equal(sample_weight, expected_balanced**2)
+
+    # Test with a missing class
+    y = np.asarray([1, 1, 1, 2, 2, 2, 3])
+    sample_weight = compute_sample_weight("balanced", y, indices=range(6))
+    assert_array_almost_equal(sample_weight, [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0])
+
+    # Test with a missing class for multi-output
+    y = np.asarray([[1, 0], [1, 0], [1, 0], [2, 1], [2, 1], [2, 1], [2, 2]])
+    sample_weight = compute_sample_weight("balanced", y, indices=range(6))
+    assert_array_almost_equal(sample_weight, [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0])
+
+
+@pytest.mark.parametrize(
+    "y_type, class_weight, indices, err_msg",
+    [
+        (
+            "single-output",
+            {1: 2, 2: 1},
+            range(4),
+            "The only valid class_weight for subsampling is 'balanced'.",
+        ),
+        (
+            "multi-output",
+            {1: 2, 2: 1},
+            None,
+            "For multi-output, class_weight should be a list of dicts, or the string",
+        ),
+        (
+            "multi-output",
+            [{1: 2, 2: 1}],
+            None,
+            r"Got 1 element\(s\) while having 2 outputs",
+        ),
+    ],
+)
+def test_compute_sample_weight_errors(y_type, class_weight, indices, err_msg):
+    # Test compute_sample_weight raises errors expected.
+    # Invalid preset string
+    y_single_output = np.asarray([1, 1, 1, 2, 2, 2])
+    y_multi_output = np.asarray([[1, 0], [1, 0], [1, 0], [2, 1], [2, 1], [2, 1]])
+
+    y = y_single_output if y_type == "single-output" else y_multi_output
+    with pytest.raises(ValueError, match=err_msg):
+        compute_sample_weight(class_weight, y, indices=indices)
+
+
+def test_compute_sample_weight_more_than_32():
+    # Non-regression smoke test for #12146
+    y = np.arange(50)  # more than 32 distinct classes
+    indices = np.arange(50)  # use subsampling
+    weight = compute_sample_weight("balanced", y, indices=indices)
+    assert_array_almost_equal(weight, np.ones(y.shape[0]))
+
+
+def test_class_weight_does_not_contains_more_classes():
+    """Check that class_weight can contain more labels than in y.
+
+    Non-regression test for #22413
+    """
+    tree = DecisionTreeClassifier(class_weight={0: 1, 1: 10, 2: 20})
+
+    # Does not raise
+    tree.fit([[0, 0, 1], [1, 0, 1], [1, 2, 0]], [0, 0, 1])
+
+
+@pytest.mark.parametrize("csc_container", CSC_CONTAINERS)
+def test_compute_sample_weight_sparse(csc_container):
+    """Check that we can compute weight for sparse `y`."""
+    y = csc_container(np.asarray([[0], [1], [1]]))
+    sample_weight = compute_sample_weight("balanced", y)
+    assert_allclose(sample_weight, [1.5, 0.75, 0.75])
@@ -0,0 +1,234 @@
+import numpy as np
+import pytest
+
+from sklearn.utils._cython_blas import (
+    ColMajor,
+    NoTrans,
+    RowMajor,
+    Trans,
+    _asum_memview,
+    _axpy_memview,
+    _copy_memview,
+    _dot_memview,
+    _gemm_memview,
+    _gemv_memview,
+    _ger_memview,
+    _nrm2_memview,
+    _rot_memview,
+    _rotg_memview,
+    _scal_memview,
+)
+from sklearn.utils._testing import assert_allclose
+
+
+def _numpy_to_cython(dtype):
+    cython = pytest.importorskip("cython")
+    if dtype == np.float32:
+        return cython.float
+    elif dtype == np.float64:
+        return cython.double
+
+
+RTOL = {np.float32: 1e-6, np.float64: 1e-12}
+ORDER = {RowMajor: "C", ColMajor: "F"}
+
+
+def _no_op(x):
+    return x
+
+
+@pytest.mark.parametrize("dtype", [np.float32, np.float64])
+def test_dot(dtype):
+    dot = _dot_memview[_numpy_to_cython(dtype)]
+
+    rng = np.random.RandomState(0)
+    x = rng.random_sample(10).astype(dtype, copy=False)
+    y = rng.random_sample(10).astype(dtype, copy=False)
+
+    expected = x.dot(y)
+    actual = dot(x, y)
+
+    assert_allclose(actual, expected, rtol=RTOL[dtype])
+
+
+@pytest.mark.parametrize("dtype", [np.float32, np.float64])
+def test_asum(dtype):
+    asum = _asum_memview[_numpy_to_cython(dtype)]
+
+    rng = np.random.RandomState(0)
+    x = rng.random_sample(10).astype(dtype, copy=False)
+
+    expected = np.abs(x).sum()
+    actual = asum(x)
+
+    assert_allclose(actual, expected, rtol=RTOL[dtype])
+
+
+@pytest.mark.parametrize("dtype", [np.float32, np.float64])
+def test_axpy(dtype):
+    axpy = _axpy_memview[_numpy_to_cython(dtype)]
+
+    rng = np.random.RandomState(0)
+    x = rng.random_sample(10).astype(dtype, copy=False)
+    y = rng.random_sample(10).astype(dtype, copy=False)
+    alpha = 2.5
+
+    expected = alpha * x + y
+    axpy(alpha, x, y)
+
+    assert_allclose(y, expected, rtol=RTOL[dtype])
+
+
+@pytest.mark.parametrize("dtype", [np.float32, np.float64])
+def test_nrm2(dtype):
+    nrm2 = _nrm2_memview[_numpy_to_cython(dtype)]
+
+    rng = np.random.RandomState(0)
+    x = rng.random_sample(10).astype(dtype, copy=False)
+
+    expected = np.linalg.norm(x)
+    actual = nrm2(x)
+
+    assert_allclose(actual, expected, rtol=RTOL[dtype])
+
+
+@pytest.mark.parametrize("dtype", [np.float32, np.float64])
+def test_copy(dtype):
+    copy = _copy_memview[_numpy_to_cython(dtype)]
+
+    rng = np.random.RandomState(0)
+    x = rng.random_sample(10).astype(dtype, copy=False)
+    y = np.empty_like(x)
+
+    expected = x.copy()
+    copy(x, y)
+
+    assert_allclose(y, expected, rtol=RTOL[dtype])
+
+
+@pytest.mark.parametrize("dtype", [np.float32, np.float64])
+def test_scal(dtype):
+    scal = _scal_memview[_numpy_to_cython(dtype)]
+
+    rng = np.random.RandomState(0)
+    x = rng.random_sample(10).astype(dtype, copy=False)
+    alpha = 2.5
+
+    expected = alpha * x
+    scal(alpha, x)
+
+    assert_allclose(x, expected, rtol=RTOL[dtype])
+
+
+@pytest.mark.parametrize("dtype", [np.float32, np.float64])
+def test_rotg(dtype):
+    rotg = _rotg_memview[_numpy_to_cython(dtype)]
+
+    rng = np.random.RandomState(0)
+    a = dtype(rng.randn())
+    b = dtype(rng.randn())
+    c, s = 0.0, 0.0
+
+    def expected_rotg(a, b):
+        roe = a if abs(a) > abs(b) else b
+        if a == 0 and b == 0:
+            c, s, r, z = (1, 0, 0, 0)
+        else:
+            r = np.sqrt(a**2 + b**2) * (1 if roe >= 0 else -1)
+            c, s = a / r, b / r
+            z = s if roe == a else (1 if c == 0 else 1 / c)
+        return r, z, c, s
+
+    expected = expected_rotg(a, b)
+    actual = rotg(a, b, c, s)
+
+    assert_allclose(actual, expected, rtol=RTOL[dtype])
+
+
+@pytest.mark.parametrize("dtype", [np.float32, np.float64])
+def test_rot(dtype):
+    rot = _rot_memview[_numpy_to_cython(dtype)]
+
+    rng = np.random.RandomState(0)
+    x = rng.random_sample(10).astype(dtype, copy=False)
+    y = rng.random_sample(10).astype(dtype, copy=False)
+    c = dtype(rng.randn())
+    s = dtype(rng.randn())
+
+    expected_x = c * x + s * y
+    expected_y = c * y - s * x
+
+    rot(x, y, c, s)
+
+    assert_allclose(x, expected_x)
+    assert_allclose(y, expected_y)
+
+
+@pytest.mark.parametrize("dtype", [np.float32, np.float64])
+@pytest.mark.parametrize(
+    "opA, transA", [(_no_op, NoTrans), (np.transpose, Trans)], ids=["NoTrans", "Trans"]
+)
+@pytest.mark.parametrize("order", [RowMajor, ColMajor], ids=["RowMajor", "ColMajor"])
+def test_gemv(dtype, opA, transA, order):
+    gemv = _gemv_memview[_numpy_to_cython(dtype)]
+
+    rng = np.random.RandomState(0)
+    A = np.asarray(
+        opA(rng.random_sample((20, 10)).astype(dtype, copy=False)), order=ORDER[order]
+    )
+    x = rng.random_sample(10).astype(dtype, copy=False)
+    y = rng.random_sample(20).astype(dtype, copy=False)
+    alpha, beta = 2.5, -0.5
+
+    expected = alpha * opA(A).dot(x) + beta * y
+    gemv(transA, alpha, A, x, beta, y)
+
+    assert_allclose(y, expected, rtol=RTOL[dtype])
+
+
+@pytest.mark.parametrize("dtype", [np.float32, np.float64])
+@pytest.mark.parametrize("order", [RowMajor, ColMajor], ids=["RowMajor", "ColMajor"])
+def test_ger(dtype, order):
+    ger = _ger_memview[_numpy_to_cython(dtype)]
+
+    rng = np.random.RandomState(0)
+    x = rng.random_sample(10).astype(dtype, copy=False)
+    y = rng.random_sample(20).astype(dtype, copy=False)
+    A = np.asarray(
+        rng.random_sample((10, 20)).astype(dtype, copy=False), order=ORDER[order]
+    )
+    alpha = 2.5
+
+    expected = alpha * np.outer(x, y) + A
+    ger(alpha, x, y, A)
+
+    assert_allclose(A, expected, rtol=RTOL[dtype])
+
+
+@pytest.mark.parametrize("dtype", [np.float32, np.float64])
+@pytest.mark.parametrize(
+    "opB, transB", [(_no_op, NoTrans), (np.transpose, Trans)], ids=["NoTrans", "Trans"]
+)
+@pytest.mark.parametrize(
+    "opA, transA", [(_no_op, NoTrans), (np.transpose, Trans)], ids=["NoTrans", "Trans"]
+)
+@pytest.mark.parametrize("order", [RowMajor, ColMajor], ids=["RowMajor", "ColMajor"])
+def test_gemm(dtype, opA, transA, opB, transB, order):
+    gemm = _gemm_memview[_numpy_to_cython(dtype)]
+
+    rng = np.random.RandomState(0)
+    A = np.asarray(
+        opA(rng.random_sample((30, 10)).astype(dtype, copy=False)), order=ORDER[order]
+    )
+    B = np.asarray(
+        opB(rng.random_sample((10, 20)).astype(dtype, copy=False)), order=ORDER[order]
+    )
+    C = np.asarray(
+        rng.random_sample((30, 20)).astype(dtype, copy=False), order=ORDER[order]
+    )
+    alpha, beta = 2.5, -0.5
+
+    expected = alpha * opA(A).dot(opB(B)) + beta * C
+    gemm(transA, transB, alpha, A, B, beta, C)
+
+    assert_allclose(C, expected, rtol=RTOL[dtype])
@@ -0,0 +1,22 @@
+import pathlib
+
+import pytest
+
+import sklearn
+
+
+def test_files_generated_by_templates_are_git_ignored():
+    """Check the consistence of the files generated from template files."""
+    gitignore_file = pathlib.Path(sklearn.__file__).parent.parent / ".gitignore"
+    if not gitignore_file.exists():
+        pytest.skip("Tests are not run from the source folder")
+
+    base_dir = pathlib.Path(sklearn.__file__).parent
+    ignored_files = gitignore_file.read_text().split("\n")
+    ignored_files = [pathlib.Path(line) for line in ignored_files]
+
+    for filename in base_dir.glob("**/*.tp"):
+        filename = filename.relative_to(base_dir.parent)
+        # From "path/to/template.p??.tp" to "path/to/template.p??"
+        filename_wo_tempita_suffix = filename.with_suffix("")
+        assert filename_wo_tempita_suffix in ignored_files
@@ -0,0 +1,88 @@
+# Authors: Raghav RV <rvraghav93@gmail.com>
+# License: BSD 3 clause
+
+
+import pickle
+
+import pytest
+
+from sklearn.utils.deprecation import _is_deprecated, deprecated
+
+
+@deprecated("qwerty")
+class MockClass1:
+    pass
+
+
+class MockClass2:
+    @deprecated("mockclass2_method")
+    def method(self):
+        pass
+
+    @deprecated("n_features_ is deprecated")  # type: ignore
+    @property
+    def n_features_(self):
+        """Number of input features."""
+        return 10
+
+
+class MockClass3:
+    @deprecated()
+    def __init__(self):
+        pass
+
+
+class MockClass4:
+    pass
+
+
+class MockClass5(MockClass1):
+    """Inherit from deprecated class but does not call super().__init__."""
+
+    def __init__(self, a):
+        self.a = a
+
+
+@deprecated("a message")
+class MockClass6:
+    """A deprecated class that overrides __new__."""
+
+    def __new__(cls, *args, **kwargs):
+        assert len(args) > 0
+        return super().__new__(cls)
+
+
+@deprecated()
+def mock_function():
+    return 10
+
+
+def test_deprecated():
+    with pytest.warns(FutureWarning, match="qwerty"):
+        MockClass1()
+    with pytest.warns(FutureWarning, match="mockclass2_method"):
+        MockClass2().method()
+    with pytest.warns(FutureWarning, match="deprecated"):
+        MockClass3()
+    with pytest.warns(FutureWarning, match="qwerty"):
+        MockClass5(42)
+    with pytest.warns(FutureWarning, match="a message"):
+        MockClass6(42)
+    with pytest.warns(FutureWarning, match="deprecated"):
+        val = mock_function()
+    assert val == 10
+
+
+def test_is_deprecated():
+    # Test if _is_deprecated helper identifies wrapping via deprecated
+    # NOTE it works only for class methods and functions
+    assert _is_deprecated(MockClass1.__new__)
+    assert _is_deprecated(MockClass2().method)
+    assert _is_deprecated(MockClass3.__init__)
+    assert not _is_deprecated(MockClass4.__init__)
+    assert _is_deprecated(MockClass5.__new__)
+    assert _is_deprecated(mock_function)
+
+
+def test_pickle():
+    pickle.loads(pickle.dumps(mock_function))
@@ -0,0 +1,274 @@
+import pickle
+
+import numpy as np
+import pytest
+from numpy.testing import assert_array_equal
+
+from sklearn.utils._encode import _check_unknown, _encode, _get_counts, _unique
+
+
+@pytest.mark.parametrize(
+    "values, expected",
+    [
+        (np.array([2, 1, 3, 1, 3], dtype="int64"), np.array([1, 2, 3], dtype="int64")),
+        (
+            np.array([2, 1, np.nan, 1, np.nan], dtype="float32"),
+            np.array([1, 2, np.nan], dtype="float32"),
+        ),
+        (
+            np.array(["b", "a", "c", "a", "c"], dtype=object),
+            np.array(["a", "b", "c"], dtype=object),
+        ),
+        (
+            np.array(["b", "a", None, "a", None], dtype=object),
+            np.array(["a", "b", None], dtype=object),
+        ),
+        (np.array(["b", "a", "c", "a", "c"]), np.array(["a", "b", "c"])),
+    ],
+    ids=["int64", "float32-nan", "object", "object-None", "str"],
+)
+def test_encode_util(values, expected):
+    uniques = _unique(values)
+    assert_array_equal(uniques, expected)
+
+    result, encoded = _unique(values, return_inverse=True)
+    assert_array_equal(result, expected)
+    assert_array_equal(encoded, np.array([1, 0, 2, 0, 2]))
+
+    encoded = _encode(values, uniques=uniques)
+    assert_array_equal(encoded, np.array([1, 0, 2, 0, 2]))
+
+    result, counts = _unique(values, return_counts=True)
+    assert_array_equal(result, expected)
+    assert_array_equal(counts, np.array([2, 1, 2]))
+
+    result, encoded, counts = _unique(values, return_inverse=True, return_counts=True)
+    assert_array_equal(result, expected)
+    assert_array_equal(encoded, np.array([1, 0, 2, 0, 2]))
+    assert_array_equal(counts, np.array([2, 1, 2]))
+
+
+def test_encode_with_check_unknown():
+    # test for the check_unknown parameter of _encode()
+    uniques = np.array([1, 2, 3])
+    values = np.array([1, 2, 3, 4])
+
+    # Default is True, raise error
+    with pytest.raises(ValueError, match="y contains previously unseen labels"):
+        _encode(values, uniques=uniques, check_unknown=True)
+
+    # dont raise error if False
+    _encode(values, uniques=uniques, check_unknown=False)
+
+    # parameter is ignored for object dtype
+    uniques = np.array(["a", "b", "c"], dtype=object)
+    values = np.array(["a", "b", "c", "d"], dtype=object)
+    with pytest.raises(ValueError, match="y contains previously unseen labels"):
+        _encode(values, uniques=uniques, check_unknown=False)
+
+
+def _assert_check_unknown(values, uniques, expected_diff, expected_mask):
+    diff = _check_unknown(values, uniques)
+    assert_array_equal(diff, expected_diff)
+
+    diff, valid_mask = _check_unknown(values, uniques, return_mask=True)
+    assert_array_equal(diff, expected_diff)
+    assert_array_equal(valid_mask, expected_mask)
+
+
+@pytest.mark.parametrize(
+    "values, uniques, expected_diff, expected_mask",
+    [
+        (np.array([1, 2, 3, 4]), np.array([1, 2, 3]), [4], [True, True, True, False]),
+        (np.array([2, 1, 4, 5]), np.array([2, 5, 1]), [4], [True, True, False, True]),
+        (np.array([2, 1, np.nan]), np.array([2, 5, 1]), [np.nan], [True, True, False]),
+        (
+            np.array([2, 1, 4, np.nan]),
+            np.array([2, 5, 1, np.nan]),
+            [4],
+            [True, True, False, True],
+        ),
+        (
+            np.array([2, 1, 4, np.nan]),
+            np.array([2, 5, 1]),
+            [4, np.nan],
+            [True, True, False, False],
+        ),
+        (
+            np.array([2, 1, 4, 5]),
+            np.array([2, 5, 1, np.nan]),
+            [4],
+            [True, True, False, True],
+        ),
+        (
+            np.array(["a", "b", "c", "d"], dtype=object),
+            np.array(["a", "b", "c"], dtype=object),
+            np.array(["d"], dtype=object),
+            [True, True, True, False],
+        ),
+        (
+            np.array(["d", "c", "a", "b"], dtype=object),
+            np.array(["a", "c", "b"], dtype=object),
+            np.array(["d"], dtype=object),
+            [False, True, True, True],
+        ),
+        (
+            np.array(["a", "b", "c", "d"]),
+            np.array(["a", "b", "c"]),
+            np.array(["d"]),
+            [True, True, True, False],
+        ),
+        (
+            np.array(["d", "c", "a", "b"]),
+            np.array(["a", "c", "b"]),
+            np.array(["d"]),
+            [False, True, True, True],
+        ),
+    ],
+)
+def test_check_unknown(values, uniques, expected_diff, expected_mask):
+    _assert_check_unknown(values, uniques, expected_diff, expected_mask)
+
+
+@pytest.mark.parametrize("missing_value", [None, np.nan, float("nan")])
+@pytest.mark.parametrize("pickle_uniques", [True, False])
+def test_check_unknown_missing_values(missing_value, pickle_uniques):
+    # check for check_unknown with missing values with object dtypes
+    values = np.array(["d", "c", "a", "b", missing_value], dtype=object)
+    uniques = np.array(["c", "a", "b", missing_value], dtype=object)
+    if pickle_uniques:
+        uniques = pickle.loads(pickle.dumps(uniques))
+
+    expected_diff = ["d"]
+    expected_mask = [False, True, True, True, True]
+    _assert_check_unknown(values, uniques, expected_diff, expected_mask)
+
+    values = np.array(["d", "c", "a", "b", missing_value], dtype=object)
+    uniques = np.array(["c", "a", "b"], dtype=object)
+    if pickle_uniques:
+        uniques = pickle.loads(pickle.dumps(uniques))
+
+    expected_diff = ["d", missing_value]
+
+    expected_mask = [False, True, True, True, False]
+    _assert_check_unknown(values, uniques, expected_diff, expected_mask)
+
+    values = np.array(["a", missing_value], dtype=object)
+    uniques = np.array(["a", "b", "z"], dtype=object)
+    if pickle_uniques:
+        uniques = pickle.loads(pickle.dumps(uniques))
+
+    expected_diff = [missing_value]
+    expected_mask = [True, False]
+    _assert_check_unknown(values, uniques, expected_diff, expected_mask)
+
+
+@pytest.mark.parametrize("missing_value", [np.nan, None, float("nan")])
+@pytest.mark.parametrize("pickle_uniques", [True, False])
+def test_unique_util_missing_values_objects(missing_value, pickle_uniques):
+    # check for _unique and _encode with missing values with object dtypes
+    values = np.array(["a", "c", "c", missing_value, "b"], dtype=object)
+    expected_uniques = np.array(["a", "b", "c", missing_value], dtype=object)
+
+    uniques = _unique(values)
+
+    if missing_value is None:
+        assert_array_equal(uniques, expected_uniques)
+    else:  # missing_value == np.nan
+        assert_array_equal(uniques[:-1], expected_uniques[:-1])
+        assert np.isnan(uniques[-1])
+
+    if pickle_uniques:
+        uniques = pickle.loads(pickle.dumps(uniques))
+
+    encoded = _encode(values, uniques=uniques)
+    assert_array_equal(encoded, np.array([0, 2, 2, 3, 1]))
+
+
+def test_unique_util_missing_values_numeric():
+    # Check missing values in numerical values
+    values = np.array([3, 1, np.nan, 5, 3, np.nan], dtype=float)
+    expected_uniques = np.array([1, 3, 5, np.nan], dtype=float)
+    expected_inverse = np.array([1, 0, 3, 2, 1, 3])
+
+    uniques = _unique(values)
+    assert_array_equal(uniques, expected_uniques)
+
+    uniques, inverse = _unique(values, return_inverse=True)
+    assert_array_equal(uniques, expected_uniques)
+    assert_array_equal(inverse, expected_inverse)
+
+    encoded = _encode(values, uniques=uniques)
+    assert_array_equal(encoded, expected_inverse)
+
+
+def test_unique_util_with_all_missing_values():
+    # test for all types of missing values for object dtype
+    values = np.array([np.nan, "a", "c", "c", None, float("nan"), None], dtype=object)
+
+    uniques = _unique(values)
+    assert_array_equal(uniques[:-1], ["a", "c", None])
+    # last value is nan
+    assert np.isnan(uniques[-1])
+
+    expected_inverse = [3, 0, 1, 1, 2, 3, 2]
+    _, inverse = _unique(values, return_inverse=True)
+    assert_array_equal(inverse, expected_inverse)
+
+
+def test_check_unknown_with_both_missing_values():
+    # test for both types of missing values for object dtype
+    values = np.array([np.nan, "a", "c", "c", None, np.nan, None], dtype=object)
+
+    diff = _check_unknown(values, known_values=np.array(["a", "c"], dtype=object))
+    assert diff[0] is None
+    assert np.isnan(diff[1])
+
+    diff, valid_mask = _check_unknown(
+        values, known_values=np.array(["a", "c"], dtype=object), return_mask=True
+    )
+
+    assert diff[0] is None
+    assert np.isnan(diff[1])
+    assert_array_equal(valid_mask, [False, True, True, True, False, False, False])
+
+
+@pytest.mark.parametrize(
+    "values, uniques, expected_counts",
+    [
+        (np.array([1] * 10 + [2] * 4 + [3] * 15), np.array([1, 2, 3]), [10, 4, 15]),
+        (
+            np.array([1] * 10 + [2] * 4 + [3] * 15),
+            np.array([1, 2, 3, 5]),
+            [10, 4, 15, 0],
+        ),
+        (
+            np.array([np.nan] * 10 + [2] * 4 + [3] * 15),
+            np.array([2, 3, np.nan]),
+            [4, 15, 10],
+        ),
+        (
+            np.array(["b"] * 4 + ["a"] * 16 + ["c"] * 20, dtype=object),
+            ["a", "b", "c"],
+            [16, 4, 20],
+        ),
+        (
+            np.array(["b"] * 4 + ["a"] * 16 + ["c"] * 20, dtype=object),
+            ["c", "b", "a"],
+            [20, 4, 16],
+        ),
+        (
+            np.array([np.nan] * 4 + ["a"] * 16 + ["c"] * 20, dtype=object),
+            ["c", np.nan, "a"],
+            [20, 4, 16],
+        ),
+        (
+            np.array(["b"] * 4 + ["a"] * 16 + ["c"] * 20, dtype=object),
+            ["a", "b", "c", "e"],
+            [16, 4, 20, 0],
+        ),
+    ],
+)
+def test_get_counts(values, uniques, expected_counts):
+    counts = _get_counts(values, uniques)
+    assert_array_equal(counts, expected_counts)
@@ -0,0 +1,518 @@
+import html
+import locale
+import re
+from contextlib import closing
+from io import StringIO
+from unittest.mock import patch
+
+import pytest
+
+from sklearn import config_context
+from sklearn.base import BaseEstimator
+from sklearn.cluster import AgglomerativeClustering, Birch
+from sklearn.compose import ColumnTransformer, make_column_transformer
+from sklearn.datasets import load_iris
+from sklearn.decomposition import PCA, TruncatedSVD
+from sklearn.ensemble import StackingClassifier, StackingRegressor, VotingClassifier
+from sklearn.feature_selection import SelectPercentile
+from sklearn.gaussian_process.kernels import ExpSineSquared
+from sklearn.impute import SimpleImputer
+from sklearn.kernel_ridge import KernelRidge
+from sklearn.linear_model import LogisticRegression
+from sklearn.model_selection import RandomizedSearchCV
+from sklearn.multiclass import OneVsOneClassifier
+from sklearn.neural_network import MLPClassifier
+from sklearn.pipeline import FeatureUnion, Pipeline, make_pipeline
+from sklearn.preprocessing import OneHotEncoder, StandardScaler
+from sklearn.svm import LinearSVC, LinearSVR
+from sklearn.tree import DecisionTreeClassifier
+from sklearn.utils._estimator_html_repr import (
+    _get_css_style,
+    _get_visual_block,
+    _HTMLDocumentationLinkMixin,
+    _write_label_html,
+    estimator_html_repr,
+)
+from sklearn.utils.fixes import parse_version
+
+
+@pytest.mark.parametrize("checked", [True, False])
+def test_write_label_html(checked):
+    # Test checking logic and labeling
+    name = "LogisticRegression"
+    tool_tip = "hello-world"
+
+    with closing(StringIO()) as out:
+        _write_label_html(out, name, tool_tip, checked=checked)
+        html_label = out.getvalue()
+
+        p = (
+            r'<label for="sk-estimator-id-[0-9]*"'
+            r' class="sk-toggleable__label (fitted)? sk-toggleable__label-arrow ">'
+            r"LogisticRegression"
+        )
+        re_compiled = re.compile(p)
+        assert re_compiled.search(html_label)
+
+        assert html_label.startswith('<div class="sk-label-container">')
+        assert "<pre>hello-world</pre>" in html_label
+        if checked:
+            assert "checked>" in html_label
+
+
+@pytest.mark.parametrize("est", ["passthrough", "drop", None])
+def test_get_visual_block_single_str_none(est):
+    # Test estimators that are represented by strings
+    est_html_info = _get_visual_block(est)
+    assert est_html_info.kind == "single"
+    assert est_html_info.estimators == est
+    assert est_html_info.names == str(est)
+    assert est_html_info.name_details == str(est)
+
+
+def test_get_visual_block_single_estimator():
+    est = LogisticRegression(C=10.0)
+    est_html_info = _get_visual_block(est)
+    assert est_html_info.kind == "single"
+    assert est_html_info.estimators == est
+    assert est_html_info.names == est.__class__.__name__
+    assert est_html_info.name_details == str(est)
+
+
+def test_get_visual_block_pipeline():
+    pipe = Pipeline(
+        [
+            ("imputer", SimpleImputer()),
+            ("do_nothing", "passthrough"),
+            ("do_nothing_more", None),
+            ("classifier", LogisticRegression()),
+        ]
+    )
+    est_html_info = _get_visual_block(pipe)
+    assert est_html_info.kind == "serial"
+    assert est_html_info.estimators == tuple(step[1] for step in pipe.steps)
+    assert est_html_info.names == [
+        "imputer: SimpleImputer",
+        "do_nothing: passthrough",
+        "do_nothing_more: passthrough",
+        "classifier: LogisticRegression",
+    ]
+    assert est_html_info.name_details == [str(est) for _, est in pipe.steps]
+
+
+def test_get_visual_block_feature_union():
+    f_union = FeatureUnion([("pca", PCA()), ("svd", TruncatedSVD())])
+    est_html_info = _get_visual_block(f_union)
+    assert est_html_info.kind == "parallel"
+    assert est_html_info.names == ("pca", "svd")
+    assert est_html_info.estimators == tuple(
+        trans[1] for trans in f_union.transformer_list
+    )
+    assert est_html_info.name_details == (None, None)
+
+
+def test_get_visual_block_voting():
+    clf = VotingClassifier(
+        [("log_reg", LogisticRegression()), ("mlp", MLPClassifier())]
+    )
+    est_html_info = _get_visual_block(clf)
+    assert est_html_info.kind == "parallel"
+    assert est_html_info.estimators == tuple(trans[1] for trans in clf.estimators)
+    assert est_html_info.names == ("log_reg", "mlp")
+    assert est_html_info.name_details == (None, None)
+
+
+def test_get_visual_block_column_transformer():
+    ct = ColumnTransformer(
+        [("pca", PCA(), ["num1", "num2"]), ("svd", TruncatedSVD, [0, 3])]
+    )
+    est_html_info = _get_visual_block(ct)
+    assert est_html_info.kind == "parallel"
+    assert est_html_info.estimators == tuple(trans[1] for trans in ct.transformers)
+    assert est_html_info.names == ("pca", "svd")
+    assert est_html_info.name_details == (["num1", "num2"], [0, 3])
+
+
+def test_estimator_html_repr_pipeline():
+    num_trans = Pipeline(
+        steps=[("pass", "passthrough"), ("imputer", SimpleImputer(strategy="median"))]
+    )
+
+    cat_trans = Pipeline(
+        steps=[
+            ("imputer", SimpleImputer(strategy="constant", missing_values="empty")),
+            ("one-hot", OneHotEncoder(drop="first")),
+        ]
+    )
+
+    preprocess = ColumnTransformer(
+        [
+            ("num", num_trans, ["a", "b", "c", "d", "e"]),
+            ("cat", cat_trans, [0, 1, 2, 3]),
+        ]
+    )
+
+    feat_u = FeatureUnion(
+        [
+            ("pca", PCA(n_components=1)),
+            (
+                "tsvd",
+                Pipeline(
+                    [
+                        ("first", TruncatedSVD(n_components=3)),
+                        ("select", SelectPercentile()),
+                    ]
+                ),
+            ),
+        ]
+    )
+
+    clf = VotingClassifier(
+        [
+            ("lr", LogisticRegression(solver="lbfgs", random_state=1)),
+            ("mlp", MLPClassifier(alpha=0.001)),
+        ]
+    )
+
+    pipe = Pipeline(
+        [("preprocessor", preprocess), ("feat_u", feat_u), ("classifier", clf)]
+    )
+    html_output = estimator_html_repr(pipe)
+
+    # top level estimators show estimator with changes
+    assert html.escape(str(pipe)) in html_output
+    for _, est in pipe.steps:
+        assert (
+            '<div class="sk-toggleable__content "><pre>' + html.escape(str(est))
+        ) in html_output
+
+    # low level estimators do not show changes
+    with config_context(print_changed_only=True):
+        assert html.escape(str(num_trans["pass"])) in html_output
+        assert "passthrough</label>" in html_output
+        assert html.escape(str(num_trans["imputer"])) in html_output
+
+        for _, _, cols in preprocess.transformers:
+            assert f"<pre>{html.escape(str(cols))}</pre>" in html_output
+
+        # feature union
+        for name, _ in feat_u.transformer_list:
+            assert f"<label>{html.escape(name)}</label>" in html_output
+
+        pca = feat_u.transformer_list[0][1]
+        assert f"<pre>{html.escape(str(pca))}</pre>" in html_output
+
+        tsvd = feat_u.transformer_list[1][1]
+        first = tsvd["first"]
+        select = tsvd["select"]
+        assert f"<pre>{html.escape(str(first))}</pre>" in html_output
+        assert f"<pre>{html.escape(str(select))}</pre>" in html_output
+
+        # voting classifier
+        for name, est in clf.estimators:
+            assert f"<label>{html.escape(name)}</label>" in html_output
+            assert f"<pre>{html.escape(str(est))}</pre>" in html_output
+
+    # verify that prefers-color-scheme is implemented
+    assert "prefers-color-scheme" in html_output
+
+
+@pytest.mark.parametrize("final_estimator", [None, LinearSVC()])
+def test_stacking_classifier(final_estimator):
+    estimators = [
+        ("mlp", MLPClassifier(alpha=0.001)),
+        ("tree", DecisionTreeClassifier()),
+    ]
+    clf = StackingClassifier(estimators=estimators, final_estimator=final_estimator)
+
+    html_output = estimator_html_repr(clf)
+
+    assert html.escape(str(clf)) in html_output
+    # If final_estimator's default changes from LogisticRegression
+    # this should be updated
+    if final_estimator is None:
+        assert "LogisticRegression(" in html_output
+    else:
+        assert final_estimator.__class__.__name__ in html_output
+
+
+@pytest.mark.parametrize("final_estimator", [None, LinearSVR()])
+def test_stacking_regressor(final_estimator):
+    reg = StackingRegressor(
+        estimators=[("svr", LinearSVR())], final_estimator=final_estimator
+    )
+    html_output = estimator_html_repr(reg)
+
+    assert html.escape(str(reg.estimators[0][0])) in html_output
+    p = (
+        r'<label for="sk-estimator-id-[0-9]*"'
+        r' class="sk-toggleable__label (fitted)? sk-toggleable__label-arrow ">'
+        r"&nbsp;LinearSVR"
+    )
+    re_compiled = re.compile(p)
+    assert re_compiled.search(html_output)
+
+    if final_estimator is None:
+        p = (
+            r'<label for="sk-estimator-id-[0-9]*"'
+            r' class="sk-toggleable__label (fitted)? sk-toggleable__label-arrow ">'
+            r"&nbsp;RidgeCV"
+        )
+        re_compiled = re.compile(p)
+        assert re_compiled.search(html_output)
+    else:
+        assert html.escape(final_estimator.__class__.__name__) in html_output
+
+
+def test_birch_duck_typing_meta():
+    # Test duck typing meta estimators with Birch
+    birch = Birch(n_clusters=AgglomerativeClustering(n_clusters=3))
+    html_output = estimator_html_repr(birch)
+
+    # inner estimators do not show changes
+    with config_context(print_changed_only=True):
+        assert f"<pre>{html.escape(str(birch.n_clusters))}" in html_output
+        assert "AgglomerativeClustering</label>" in html_output
+
+    # outer estimator contains all changes
+    assert f"<pre>{html.escape(str(birch))}" in html_output
+
+
+def test_ovo_classifier_duck_typing_meta():
+    # Test duck typing metaestimators with OVO
+    ovo = OneVsOneClassifier(LinearSVC(penalty="l1"))
+    html_output = estimator_html_repr(ovo)
+
+    # inner estimators do not show changes
+    with config_context(print_changed_only=True):
+        assert f"<pre>{html.escape(str(ovo.estimator))}" in html_output
+        # regex to match the start of the tag
+        p = (
+            r'<label for="sk-estimator-id-[0-9]*" '
+            r'class="sk-toggleable__label  sk-toggleable__label-arrow ">&nbsp;LinearSVC'
+        )
+        re_compiled = re.compile(p)
+        assert re_compiled.search(html_output)
+
+    # outer estimator
+    assert f"<pre>{html.escape(str(ovo))}" in html_output
+
+
+def test_duck_typing_nested_estimator():
+    # Test duck typing metaestimators with random search
+    kernel_ridge = KernelRidge(kernel=ExpSineSquared())
+    param_distributions = {"alpha": [1, 2]}
+
+    kernel_ridge_tuned = RandomizedSearchCV(
+        kernel_ridge,
+        param_distributions=param_distributions,
+    )
+    html_output = estimator_html_repr(kernel_ridge_tuned)
+    assert "estimator: KernelRidge</label>" in html_output
+
+
+@pytest.mark.parametrize("print_changed_only", [True, False])
+def test_one_estimator_print_change_only(print_changed_only):
+    pca = PCA(n_components=10)
+
+    with config_context(print_changed_only=print_changed_only):
+        pca_repr = html.escape(str(pca))
+        html_output = estimator_html_repr(pca)
+        assert pca_repr in html_output
+
+
+def test_fallback_exists():
+    """Check that repr fallback is in the HTML."""
+    pca = PCA(n_components=10)
+    html_output = estimator_html_repr(pca)
+
+    assert (
+        f'<div class="sk-text-repr-fallback"><pre>{html.escape(str(pca))}'
+        in html_output
+    )
+
+
+def test_show_arrow_pipeline():
+    """Show arrow in pipeline for top level in pipeline"""
+    pipe = Pipeline([("scale", StandardScaler()), ("log_Reg", LogisticRegression())])
+
+    html_output = estimator_html_repr(pipe)
+    assert (
+        'class="sk-toggleable__label  sk-toggleable__label-arrow ">&nbsp;&nbsp;Pipeline'
+        in html_output
+    )
+
+
+def test_invalid_parameters_in_stacking():
+    """Invalidate stacking configuration uses default repr.
+
+    Non-regression test for #24009.
+    """
+    stacker = StackingClassifier(estimators=[])
+
+    html_output = estimator_html_repr(stacker)
+    assert html.escape(str(stacker)) in html_output
+
+
+def test_estimator_get_params_return_cls():
+    """Check HTML repr works where a value in get_params is a class."""
+
+    class MyEstimator:
+        def get_params(self, deep=False):
+            return {"inner_cls": LogisticRegression}
+
+    est = MyEstimator()
+    assert "MyEstimator" in estimator_html_repr(est)
+
+
+def test_estimator_html_repr_unfitted_vs_fitted():
+    """Check that we have the information that the estimator is fitted or not in the
+    HTML representation.
+    """
+
+    class MyEstimator(BaseEstimator):
+        def fit(self, X, y):
+            self.fitted_ = True
+            return self
+
+    X, y = load_iris(return_X_y=True)
+    estimator = MyEstimator()
+    assert "<span>Not fitted</span>" in estimator_html_repr(estimator)
+    estimator.fit(X, y)
+    assert "<span>Fitted</span>" in estimator_html_repr(estimator)
+
+
+@pytest.mark.parametrize(
+    "estimator",
+    [
+        LogisticRegression(),
+        make_pipeline(StandardScaler(), LogisticRegression()),
+        make_pipeline(
+            make_column_transformer((StandardScaler(), slice(0, 3))),
+            LogisticRegression(),
+        ),
+    ],
+)
+def test_estimator_html_repr_fitted_icon(estimator):
+    """Check that we are showing the fitted status icon only once."""
+    pattern = '<span class="sk-estimator-doc-link ">i<span>Not fitted</span></span>'
+    assert estimator_html_repr(estimator).count(pattern) == 1
+    X, y = load_iris(return_X_y=True)
+    estimator.fit(X, y)
+    pattern = '<span class="sk-estimator-doc-link fitted">i<span>Fitted</span></span>'
+    assert estimator_html_repr(estimator).count(pattern) == 1
+
+
+@pytest.mark.parametrize("mock_version", ["1.3.0.dev0", "1.3.0"])
+def test_html_documentation_link_mixin_sklearn(mock_version):
+    """Check the behaviour of the `_HTMLDocumentationLinkMixin` class for scikit-learn
+    default.
+    """
+
+    # mock the `__version__` where the mixin is located
+    with patch("sklearn.utils._estimator_html_repr.__version__", mock_version):
+        mixin = _HTMLDocumentationLinkMixin()
+
+        assert mixin._doc_link_module == "sklearn"
+        sklearn_version = parse_version(mock_version)
+        # we need to parse the version manually to be sure that this test is passing in
+        # other branches than `main` (that is "dev").
+        if sklearn_version.dev is None:
+            version = f"{sklearn_version.major}.{sklearn_version.minor}"
+        else:
+            version = "dev"
+        assert (
+            mixin._doc_link_template
+            == f"https://scikit-learn.org/{version}/modules/generated/"
+            "{estimator_module}.{estimator_name}.html"
+        )
+        assert (
+            mixin._get_doc_link()
+            == f"https://scikit-learn.org/{version}/modules/generated/"
+            "sklearn.utils._HTMLDocumentationLinkMixin.html"
+        )
+
+
+@pytest.mark.parametrize(
+    "module_path,expected_module",
+    [
+        ("prefix.mymodule", "prefix.mymodule"),
+        ("prefix._mymodule", "prefix"),
+        ("prefix.mypackage._mymodule", "prefix.mypackage"),
+        ("prefix.mypackage._mymodule.submodule", "prefix.mypackage"),
+        ("prefix.mypackage.mymodule.submodule", "prefix.mypackage.mymodule.submodule"),
+    ],
+)
+def test_html_documentation_link_mixin_get_doc_link(module_path, expected_module):
+    """Check the behaviour of the `_get_doc_link` with various parameter."""
+
+    class FooBar(_HTMLDocumentationLinkMixin):
+        pass
+
+    FooBar.__module__ = module_path
+    est = FooBar()
+    # if we set `_doc_link`, then we expect to infer a module and name for the estimator
+    est._doc_link_module = "prefix"
+    est._doc_link_template = (
+        "https://website.com/{estimator_module}.{estimator_name}.html"
+    )
+    assert est._get_doc_link() == f"https://website.com/{expected_module}.FooBar.html"
+
+
+def test_html_documentation_link_mixin_get_doc_link_out_of_library():
+    """Check the behaviour of the `_get_doc_link` with various parameter."""
+    mixin = _HTMLDocumentationLinkMixin()
+
+    # if the `_doc_link_module` does not refer to the root module of the estimator
+    # (here the mixin), then we should return an empty string.
+    mixin._doc_link_module = "xxx"
+    assert mixin._get_doc_link() == ""
+
+
+def test_html_documentation_link_mixin_doc_link_url_param_generator():
+    mixin = _HTMLDocumentationLinkMixin()
+    # we can bypass the generation by providing our own callable
+    mixin._doc_link_template = (
+        "https://website.com/{my_own_variable}.{another_variable}.html"
+    )
+
+    def url_param_generator(estimator):
+        return {
+            "my_own_variable": "value_1",
+            "another_variable": "value_2",
+        }
+
+    mixin._doc_link_url_param_generator = url_param_generator
+
+    assert mixin._get_doc_link() == "https://website.com/value_1.value_2.html"
+
+
+@pytest.fixture
+def set_non_utf8_locale():
+    """Pytest fixture to set non utf-8 locale during the test.
+
+    The locale is set to the original one after the test has run.
+    """
+    try:
+        locale.setlocale(locale.LC_CTYPE, "C")
+    except locale.Error:
+        pytest.skip("'C' locale is not available on this OS")
+
+    yield
+
+    # Resets the locale to the original one. Python calls setlocale(LC_TYPE, "")
+    # at startup according to
+    # https://docs.python.org/3/library/locale.html#background-details-hints-tips-and-caveats.
+    # This assumes that no other locale changes have been made. For some reason,
+    # on some platforms, trying to restore locale with something like
+    # locale.setlocale(locale.LC_CTYPE, locale.getlocale()) raises a
+    # locale.Error: unsupported locale setting
+    locale.setlocale(locale.LC_CTYPE, "")
+
+
+def test_non_utf8_locale(set_non_utf8_locale):
+    """Checks that utf8 encoding is used when reading the CSS file.
+
+    Non-regression test for https://github.com/scikit-learn/scikit-learn/issues/27725
+    """
+    _get_css_style()
@@ -0,0 +1,47 @@
+"""Test fast_dict."""
+
+import numpy as np
+from numpy.testing import assert_allclose, assert_array_equal
+
+from sklearn.utils._fast_dict import IntFloatDict, argmin
+
+
+def test_int_float_dict():
+    rng = np.random.RandomState(0)
+    keys = np.unique(rng.randint(100, size=10).astype(np.intp))
+    values = rng.rand(len(keys))
+
+    d = IntFloatDict(keys, values)
+    for key, value in zip(keys, values):
+        assert d[key] == value
+    assert len(d) == len(keys)
+
+    d.append(120, 3.0)
+    assert d[120] == 3.0
+    assert len(d) == len(keys) + 1
+    for i in range(2000):
+        d.append(i + 1000, 4.0)
+    assert d[1100] == 4.0
+
+
+def test_int_float_dict_argmin():
+    # Test the argmin implementation on the IntFloatDict
+    keys = np.arange(100, dtype=np.intp)
+    values = np.arange(100, dtype=np.float64)
+    d = IntFloatDict(keys, values)
+    assert argmin(d) == (0, 0)
+
+
+def test_to_arrays():
+    # Test that an IntFloatDict is converted into arrays
+    # of keys and values correctly
+    keys_in = np.array([1, 2, 3], dtype=np.intp)
+    values_in = np.array([4, 5, 6], dtype=np.float64)
+
+    d = IntFloatDict(keys_in, values_in)
+    keys_out, values_out = d.to_arrays()
+
+    assert keys_out.dtype == keys_in.dtype
+    assert values_in.dtype == values_out.dtype
+    assert_array_equal(keys_out, keys_in)
+    assert_allclose(values_out, values_in)
@@ -0,0 +1,162 @@
+# Authors: Gael Varoquaux <gael.varoquaux@normalesup.org>
+#          Justin Vincent
+#          Lars Buitinck
+# License: BSD 3 clause
+
+import numpy as np
+import pytest
+
+from sklearn.utils._testing import assert_array_equal
+from sklearn.utils.fixes import _object_dtype_isnan, _smallest_admissible_index_dtype
+
+
+@pytest.mark.parametrize("dtype, val", ([object, 1], [object, "a"], [float, 1]))
+def test_object_dtype_isnan(dtype, val):
+    X = np.array([[val, np.nan], [np.nan, val]], dtype=dtype)
+
+    expected_mask = np.array([[False, True], [True, False]])
+
+    mask = _object_dtype_isnan(X)
+
+    assert_array_equal(mask, expected_mask)
+
+
+@pytest.mark.parametrize(
+    "params, expected_dtype",
+    [
+        ({}, np.int32),  # default behaviour
+        ({"maxval": np.iinfo(np.int32).max}, np.int32),
+        ({"maxval": np.iinfo(np.int32).max + 1}, np.int64),
+    ],
+)
+def test_smallest_admissible_index_dtype_max_val(params, expected_dtype):
+    """Check the behaviour of `smallest_admissible_index_dtype` depending only on the
+    `max_val` parameter.
+    """
+    assert _smallest_admissible_index_dtype(**params) == expected_dtype
+
+
+@pytest.mark.parametrize(
+    "params, expected_dtype",
+    [
+        # Arrays dtype is int64 and thus should not be downcasted to int32 without
+        # checking the content of providing maxval.
+        ({"arrays": np.array([1, 2], dtype=np.int64)}, np.int64),
+        # One of the array is int64 and should not be downcasted to int32
+        # for the same reasons.
+        (
+            {
+                "arrays": (
+                    np.array([1, 2], dtype=np.int32),
+                    np.array([1, 2], dtype=np.int64),
+                )
+            },
+            np.int64,
+        ),
+        # Both arrays are already int32: we can just keep this dtype.
+        (
+            {
+                "arrays": (
+                    np.array([1, 2], dtype=np.int32),
+                    np.array([1, 2], dtype=np.int32),
+                )
+            },
+            np.int32,
+        ),
+        # Arrays should be upcasted to at least int32 precision.
+        ({"arrays": np.array([1, 2], dtype=np.int8)}, np.int32),
+        # Check that `maxval` takes precedence over the arrays and thus upcast to
+        # int64.
+        (
+            {
+                "arrays": np.array([1, 2], dtype=np.int32),
+                "maxval": np.iinfo(np.int32).max + 1,
+            },
+            np.int64,
+        ),
+    ],
+)
+def test_smallest_admissible_index_dtype_without_checking_contents(
+    params, expected_dtype
+):
+    """Check the behaviour of `smallest_admissible_index_dtype` using the passed
+    arrays but without checking the contents of the arrays.
+    """
+    assert _smallest_admissible_index_dtype(**params) == expected_dtype
+
+
+@pytest.mark.parametrize(
+    "params, expected_dtype",
+    [
+        # empty arrays should always be converted to int32 indices
+        (
+            {
+                "arrays": (np.array([], dtype=np.int64), np.array([], dtype=np.int64)),
+                "check_contents": True,
+            },
+            np.int32,
+        ),
+        # arrays respecting np.iinfo(np.int32).min < x < np.iinfo(np.int32).max should
+        # be converted to int32,
+        (
+            {"arrays": np.array([1], dtype=np.int64), "check_contents": True},
+            np.int32,
+        ),
+        # otherwise, it should be converted to int64. We need to create a uint32
+        # arrays to accommodate a value > np.iinfo(np.int32).max
+        (
+            {
+                "arrays": np.array([np.iinfo(np.int32).max + 1], dtype=np.uint32),
+                "check_contents": True,
+            },
+            np.int64,
+        ),
+        # maxval should take precedence over the arrays contents and thus upcast to
+        # int64.
+        (
+            {
+                "arrays": np.array([1], dtype=np.int32),
+                "check_contents": True,
+                "maxval": np.iinfo(np.int32).max + 1,
+            },
+            np.int64,
+        ),
+        # when maxval is small, but check_contents is True and the contents
+        # require np.int64, we still require np.int64 indexing in the end.
+        (
+            {
+                "arrays": np.array([np.iinfo(np.int32).max + 1], dtype=np.uint32),
+                "check_contents": True,
+                "maxval": 1,
+            },
+            np.int64,
+        ),
+    ],
+)
+def test_smallest_admissible_index_dtype_by_checking_contents(params, expected_dtype):
+    """Check the behaviour of `smallest_admissible_index_dtype` using the dtype of the
+    arrays but as well the contents.
+    """
+    assert _smallest_admissible_index_dtype(**params) == expected_dtype
+
+
+@pytest.mark.parametrize(
+    "params, err_type, err_msg",
+    [
+        (
+            {"maxval": np.iinfo(np.int64).max + 1},
+            ValueError,
+            "is to large to be represented as np.int64",
+        ),
+        (
+            {"arrays": np.array([1, 2], dtype=np.float64)},
+            ValueError,
+            "Array dtype float64 is not supported",
+        ),
+        ({"arrays": [1, 2]}, TypeError, "Arrays should be of type np.ndarray"),
+    ],
+)
+def test_smallest_admissible_index_dtype_error(params, err_type, err_msg):
+    """Check that we raise the proper error message."""
+    with pytest.raises(err_type, match=err_msg):
+        _smallest_admissible_index_dtype(**params)
@@ -0,0 +1,80 @@
+import numpy as np
+import pytest
+from scipy.sparse.csgraph import connected_components
+
+from sklearn.metrics.pairwise import pairwise_distances
+from sklearn.neighbors import kneighbors_graph
+from sklearn.utils.graph import _fix_connected_components
+
+
+def test_fix_connected_components():
+    # Test that _fix_connected_components reduces the number of component to 1.
+    X = np.array([0, 1, 2, 5, 6, 7])[:, None]
+    graph = kneighbors_graph(X, n_neighbors=2, mode="distance")
+
+    n_connected_components, labels = connected_components(graph)
+    assert n_connected_components > 1
+
+    graph = _fix_connected_components(X, graph, n_connected_components, labels)
+
+    n_connected_components, labels = connected_components(graph)
+    assert n_connected_components == 1
+
+
+def test_fix_connected_components_precomputed():
+    # Test that _fix_connected_components accepts precomputed distance matrix.
+    X = np.array([0, 1, 2, 5, 6, 7])[:, None]
+    graph = kneighbors_graph(X, n_neighbors=2, mode="distance")
+
+    n_connected_components, labels = connected_components(graph)
+    assert n_connected_components > 1
+
+    distances = pairwise_distances(X)
+    graph = _fix_connected_components(
+        distances, graph, n_connected_components, labels, metric="precomputed"
+    )
+
+    n_connected_components, labels = connected_components(graph)
+    assert n_connected_components == 1
+
+    # but it does not work with precomputed neighbors graph
+    with pytest.raises(RuntimeError, match="does not work with a sparse"):
+        _fix_connected_components(
+            graph, graph, n_connected_components, labels, metric="precomputed"
+        )
+
+
+def test_fix_connected_components_wrong_mode():
+    # Test that the an error is raised if the mode string is incorrect.
+    X = np.array([0, 1, 2, 5, 6, 7])[:, None]
+    graph = kneighbors_graph(X, n_neighbors=2, mode="distance")
+    n_connected_components, labels = connected_components(graph)
+
+    with pytest.raises(ValueError, match="Unknown mode"):
+        graph = _fix_connected_components(
+            X, graph, n_connected_components, labels, mode="foo"
+        )
+
+
+def test_fix_connected_components_connectivity_mode():
+    # Test that the connectivity mode fill new connections with ones.
+    X = np.array([0, 1, 6, 7])[:, None]
+    graph = kneighbors_graph(X, n_neighbors=1, mode="connectivity")
+    n_connected_components, labels = connected_components(graph)
+    graph = _fix_connected_components(
+        X, graph, n_connected_components, labels, mode="connectivity"
+    )
+    assert np.all(graph.data == 1)
+
+
+def test_fix_connected_components_distance_mode():
+    # Test that the distance mode does not fill new connections with ones.
+    X = np.array([0, 1, 6, 7])[:, None]
+    graph = kneighbors_graph(X, n_neighbors=1, mode="distance")
+    assert np.all(graph.data == 1)
+
+    n_connected_components, labels = connected_components(graph)
+    graph = _fix_connected_components(
+        X, graph, n_connected_components, labels, mode="distance"
+    )
+    assert not np.all(graph.data == 1)
@@ -0,0 +1,594 @@
+import warnings
+from copy import copy
+from unittest import SkipTest
+
+import numpy as np
+import pytest
+
+import sklearn
+from sklearn.externals._packaging.version import parse as parse_version
+from sklearn.utils import _safe_indexing, resample, shuffle
+from sklearn.utils._array_api import yield_namespace_device_dtype_combinations
+from sklearn.utils._indexing import (
+    _determine_key_type,
+    _get_column_indices,
+    _safe_assign,
+)
+from sklearn.utils._mocking import MockDataFrame
+from sklearn.utils._testing import (
+    _array_api_for_tests,
+    _convert_container,
+    assert_allclose_dense_sparse,
+    assert_array_equal,
+    skip_if_array_api_compat_not_configured,
+)
+from sklearn.utils.fixes import CSC_CONTAINERS, CSR_CONTAINERS
+
+# toy array
+X_toy = np.arange(9).reshape((3, 3))
+
+
+def test_polars_indexing():
+    """Check _safe_indexing for polars as expected."""
+    pl = pytest.importorskip("polars", minversion="0.18.2")
+    df = pl.DataFrame(
+        {"a": [1, 2, 3, 4], "b": [4, 5, 6, 8], "c": [1, 4, 1, 10]}, orient="row"
+    )
+
+    from polars.testing import assert_frame_equal
+
+    str_keys = [["b"], ["a", "b"], ["b", "a", "c"], ["c"], ["a"]]
+
+    for key in str_keys:
+        out = _safe_indexing(df, key, axis=1)
+        assert_frame_equal(df[key], out)
+
+    bool_keys = [([True, False, True], ["a", "c"]), ([False, False, True], ["c"])]
+
+    for bool_key, str_key in bool_keys:
+        out = _safe_indexing(df, bool_key, axis=1)
+        assert_frame_equal(df[:, str_key], out)
+
+    int_keys = [([0, 1], ["a", "b"]), ([2], ["c"])]
+
+    for int_key, str_key in int_keys:
+        out = _safe_indexing(df, int_key, axis=1)
+        assert_frame_equal(df[:, str_key], out)
+
+    axis_0_keys = [[0, 1], [1, 3], [3, 2]]
+    for key in axis_0_keys:
+        out = _safe_indexing(df, key, axis=0)
+        assert_frame_equal(df[key], out)
+
+
+@pytest.mark.parametrize(
+    "key, dtype",
+    [
+        (0, "int"),
+        ("0", "str"),
+        (True, "bool"),
+        (np.bool_(True), "bool"),
+        ([0, 1, 2], "int"),
+        (["0", "1", "2"], "str"),
+        ((0, 1, 2), "int"),
+        (("0", "1", "2"), "str"),
+        (slice(None, None), None),
+        (slice(0, 2), "int"),
+        (np.array([0, 1, 2], dtype=np.int32), "int"),
+        (np.array([0, 1, 2], dtype=np.int64), "int"),
+        (np.array([0, 1, 2], dtype=np.uint8), "int"),
+        ([True, False], "bool"),
+        ((True, False), "bool"),
+        (np.array([True, False]), "bool"),
+        ("col_0", "str"),
+        (["col_0", "col_1", "col_2"], "str"),
+        (("col_0", "col_1", "col_2"), "str"),
+        (slice("begin", "end"), "str"),
+        (np.array(["col_0", "col_1", "col_2"]), "str"),
+        (np.array(["col_0", "col_1", "col_2"], dtype=object), "str"),
+    ],
+)
+def test_determine_key_type(key, dtype):
+    assert _determine_key_type(key) == dtype
+
+
+def test_determine_key_type_error():
+    with pytest.raises(ValueError, match="No valid specification of the"):
+        _determine_key_type(1.0)
+
+
+def test_determine_key_type_slice_error():
+    with pytest.raises(TypeError, match="Only array-like or scalar are"):
+        _determine_key_type(slice(0, 2, 1), accept_slice=False)
+
+
+@skip_if_array_api_compat_not_configured
+@pytest.mark.parametrize(
+    "array_namespace, device, dtype_name", yield_namespace_device_dtype_combinations()
+)
+def test_determine_key_type_array_api(array_namespace, device, dtype_name):
+    xp = _array_api_for_tests(array_namespace, device)
+
+    with sklearn.config_context(array_api_dispatch=True):
+        int_array_key = xp.asarray([1, 2, 3])
+        assert _determine_key_type(int_array_key) == "int"
+
+        bool_array_key = xp.asarray([True, False, True])
+        assert _determine_key_type(bool_array_key) == "bool"
+
+        try:
+            complex_array_key = xp.asarray([1 + 1j, 2 + 2j, 3 + 3j])
+        except TypeError:
+            # Complex numbers are not supported by all Array API libraries.
+            complex_array_key = None
+
+        if complex_array_key is not None:
+            with pytest.raises(ValueError, match="No valid specification of the"):
+                _determine_key_type(complex_array_key)
+
+
+@pytest.mark.parametrize(
+    "array_type", ["list", "array", "sparse", "dataframe", "polars"]
+)
+@pytest.mark.parametrize("indices_type", ["list", "tuple", "array", "series", "slice"])
+def test_safe_indexing_2d_container_axis_0(array_type, indices_type):
+    indices = [1, 2]
+    if indices_type == "slice" and isinstance(indices[1], int):
+        indices[1] += 1
+    array = _convert_container([[1, 2, 3], [4, 5, 6], [7, 8, 9]], array_type)
+    indices = _convert_container(indices, indices_type)
+    subset = _safe_indexing(array, indices, axis=0)
+    assert_allclose_dense_sparse(
+        subset, _convert_container([[4, 5, 6], [7, 8, 9]], array_type)
+    )
+
+
+@pytest.mark.parametrize("array_type", ["list", "array", "series", "polars_series"])
+@pytest.mark.parametrize("indices_type", ["list", "tuple", "array", "series", "slice"])
+def test_safe_indexing_1d_container(array_type, indices_type):
+    indices = [1, 2]
+    if indices_type == "slice" and isinstance(indices[1], int):
+        indices[1] += 1
+    array = _convert_container([1, 2, 3, 4, 5, 6, 7, 8, 9], array_type)
+    indices = _convert_container(indices, indices_type)
+    subset = _safe_indexing(array, indices, axis=0)
+    assert_allclose_dense_sparse(subset, _convert_container([2, 3], array_type))
+
+
+@pytest.mark.parametrize("array_type", ["array", "sparse", "dataframe", "polars"])
+@pytest.mark.parametrize("indices_type", ["list", "tuple", "array", "series", "slice"])
+@pytest.mark.parametrize("indices", [[1, 2], ["col_1", "col_2"]])
+def test_safe_indexing_2d_container_axis_1(array_type, indices_type, indices):
+    # validation of the indices
+    # we make a copy because indices is mutable and shared between tests
+    indices_converted = copy(indices)
+    if indices_type == "slice" and isinstance(indices[1], int):
+        indices_converted[1] += 1
+
+    columns_name = ["col_0", "col_1", "col_2"]
+    array = _convert_container(
+        [[1, 2, 3], [4, 5, 6], [7, 8, 9]], array_type, columns_name
+    )
+    indices_converted = _convert_container(indices_converted, indices_type)
+
+    if isinstance(indices[0], str) and array_type not in ("dataframe", "polars"):
+        err_msg = (
+            "Specifying the columns using strings is only supported for dataframes"
+        )
+        with pytest.raises(ValueError, match=err_msg):
+            _safe_indexing(array, indices_converted, axis=1)
+    else:
+        subset = _safe_indexing(array, indices_converted, axis=1)
+        assert_allclose_dense_sparse(
+            subset, _convert_container([[2, 3], [5, 6], [8, 9]], array_type)
+        )
+
+
+@pytest.mark.parametrize("array_read_only", [True, False])
+@pytest.mark.parametrize("indices_read_only", [True, False])
+@pytest.mark.parametrize("array_type", ["array", "sparse", "dataframe", "polars"])
+@pytest.mark.parametrize("indices_type", ["array", "series"])
+@pytest.mark.parametrize(
+    "axis, expected_array", [(0, [[4, 5, 6], [7, 8, 9]]), (1, [[2, 3], [5, 6], [8, 9]])]
+)
+def test_safe_indexing_2d_read_only_axis_1(
+    array_read_only, indices_read_only, array_type, indices_type, axis, expected_array
+):
+    array = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
+    if array_read_only:
+        array.setflags(write=False)
+    array = _convert_container(array, array_type)
+    indices = np.array([1, 2])
+    if indices_read_only:
+        indices.setflags(write=False)
+    indices = _convert_container(indices, indices_type)
+    subset = _safe_indexing(array, indices, axis=axis)
+    assert_allclose_dense_sparse(subset, _convert_container(expected_array, array_type))
+
+
+@pytest.mark.parametrize("array_type", ["list", "array", "series", "polars_series"])
+@pytest.mark.parametrize("indices_type", ["list", "tuple", "array", "series"])
+def test_safe_indexing_1d_container_mask(array_type, indices_type):
+    indices = [False] + [True] * 2 + [False] * 6
+    array = _convert_container([1, 2, 3, 4, 5, 6, 7, 8, 9], array_type)
+    indices = _convert_container(indices, indices_type)
+    subset = _safe_indexing(array, indices, axis=0)
+    assert_allclose_dense_sparse(subset, _convert_container([2, 3], array_type))
+
+
+@pytest.mark.parametrize("array_type", ["array", "sparse", "dataframe", "polars"])
+@pytest.mark.parametrize("indices_type", ["list", "tuple", "array", "series"])
+@pytest.mark.parametrize(
+    "axis, expected_subset",
+    [(0, [[4, 5, 6], [7, 8, 9]]), (1, [[2, 3], [5, 6], [8, 9]])],
+)
+def test_safe_indexing_2d_mask(array_type, indices_type, axis, expected_subset):
+    columns_name = ["col_0", "col_1", "col_2"]
+    array = _convert_container(
+        [[1, 2, 3], [4, 5, 6], [7, 8, 9]], array_type, columns_name
+    )
+    indices = [False, True, True]
+    indices = _convert_container(indices, indices_type)
+
+    subset = _safe_indexing(array, indices, axis=axis)
+    assert_allclose_dense_sparse(
+        subset, _convert_container(expected_subset, array_type)
+    )
+
+
+@pytest.mark.parametrize(
+    "array_type, expected_output_type",
+    [
+        ("list", "list"),
+        ("array", "array"),
+        ("sparse", "sparse"),
+        ("dataframe", "series"),
+        ("polars", "polars_series"),
+    ],
+)
+def test_safe_indexing_2d_scalar_axis_0(array_type, expected_output_type):
+    array = _convert_container([[1, 2, 3], [4, 5, 6], [7, 8, 9]], array_type)
+    indices = 2
+    subset = _safe_indexing(array, indices, axis=0)
+    expected_array = _convert_container([7, 8, 9], expected_output_type)
+    assert_allclose_dense_sparse(subset, expected_array)
+
+
+@pytest.mark.parametrize("array_type", ["list", "array", "series", "polars_series"])
+def test_safe_indexing_1d_scalar(array_type):
+    array = _convert_container([1, 2, 3, 4, 5, 6, 7, 8, 9], array_type)
+    indices = 2
+    subset = _safe_indexing(array, indices, axis=0)
+    assert subset == 3
+
+
+@pytest.mark.parametrize(
+    "array_type, expected_output_type",
+    [
+        ("array", "array"),
+        ("sparse", "sparse"),
+        ("dataframe", "series"),
+        ("polars", "polars_series"),
+    ],
+)
+@pytest.mark.parametrize("indices", [2, "col_2"])
+def test_safe_indexing_2d_scalar_axis_1(array_type, expected_output_type, indices):
+    columns_name = ["col_0", "col_1", "col_2"]
+    array = _convert_container(
+        [[1, 2, 3], [4, 5, 6], [7, 8, 9]], array_type, columns_name
+    )
+
+    if isinstance(indices, str) and array_type not in ("dataframe", "polars"):
+        err_msg = (
+            "Specifying the columns using strings is only supported for dataframes"
+        )
+        with pytest.raises(ValueError, match=err_msg):
+            _safe_indexing(array, indices, axis=1)
+    else:
+        subset = _safe_indexing(array, indices, axis=1)
+        expected_output = [3, 6, 9]
+        if expected_output_type == "sparse":
+            # sparse matrix are keeping the 2D shape
+            expected_output = [[3], [6], [9]]
+        expected_array = _convert_container(expected_output, expected_output_type)
+        assert_allclose_dense_sparse(subset, expected_array)
+
+
+@pytest.mark.parametrize("array_type", ["list", "array", "sparse"])
+def test_safe_indexing_None_axis_0(array_type):
+    X = _convert_container([[1, 2, 3], [4, 5, 6], [7, 8, 9]], array_type)
+    X_subset = _safe_indexing(X, None, axis=0)
+    assert_allclose_dense_sparse(X_subset, X)
+
+
+def test_safe_indexing_pandas_no_matching_cols_error():
+    pd = pytest.importorskip("pandas")
+    err_msg = "No valid specification of the columns."
+    X = pd.DataFrame(X_toy)
+    with pytest.raises(ValueError, match=err_msg):
+        _safe_indexing(X, [1.0], axis=1)
+
+
+@pytest.mark.parametrize("axis", [None, 3])
+def test_safe_indexing_error_axis(axis):
+    with pytest.raises(ValueError, match="'axis' should be either 0"):
+        _safe_indexing(X_toy, [0, 1], axis=axis)
+
+
+@pytest.mark.parametrize("X_constructor", ["array", "series", "polars_series"])
+def test_safe_indexing_1d_array_error(X_constructor):
+    # check that we are raising an error if the array-like passed is 1D and
+    # we try to index on the 2nd dimension
+    X = list(range(5))
+    if X_constructor == "array":
+        X_constructor = np.asarray(X)
+    elif X_constructor == "series":
+        pd = pytest.importorskip("pandas")
+        X_constructor = pd.Series(X)
+    elif X_constructor == "polars_series":
+        pl = pytest.importorskip("polars")
+        X_constructor = pl.Series(values=X)
+
+    err_msg = "'X' should be a 2D NumPy array, 2D sparse matrix or dataframe"
+    with pytest.raises(ValueError, match=err_msg):
+        _safe_indexing(X_constructor, [0, 1], axis=1)
+
+
+def test_safe_indexing_container_axis_0_unsupported_type():
+    indices = ["col_1", "col_2"]
+    array = [[1, 2, 3], [4, 5, 6], [7, 8, 9]]
+    err_msg = "String indexing is not supported with 'axis=0'"
+    with pytest.raises(ValueError, match=err_msg):
+        _safe_indexing(array, indices, axis=0)
+
+
+def test_safe_indexing_pandas_no_settingwithcopy_warning():
+    # Using safe_indexing with an array-like indexer gives a copy of the
+    # DataFrame -> ensure it doesn't raise a warning if modified
+    pd = pytest.importorskip("pandas")
+
+    pd_version = parse_version(pd.__version__)
+    pd_base_version = parse_version(pd_version.base_version)
+
+    if pd_base_version >= parse_version("3"):
+        raise SkipTest("SettingWithCopyWarning has been removed in pandas 3.0.0.dev")
+
+    X = pd.DataFrame({"a": [1, 2, 3], "b": [3, 4, 5]})
+    subset = _safe_indexing(X, [0, 1], axis=0)
+    if hasattr(pd.errors, "SettingWithCopyWarning"):
+        SettingWithCopyWarning = pd.errors.SettingWithCopyWarning
+    else:
+        # backward compatibility for pandas < 1.5
+        SettingWithCopyWarning = pd.core.common.SettingWithCopyWarning
+    with warnings.catch_warnings():
+        warnings.simplefilter("error", SettingWithCopyWarning)
+        subset.iloc[0, 0] = 10
+    # The original dataframe is unaffected by the assignment on the subset:
+    assert X.iloc[0, 0] == 1
+
+
+@pytest.mark.parametrize("indices", [0, [0, 1], slice(0, 2), np.array([0, 1])])
+def test_safe_indexing_list_axis_1_unsupported(indices):
+    """Check that we raise a ValueError when axis=1 with input as list."""
+    X = [[1, 2], [4, 5], [7, 8]]
+    err_msg = "axis=1 is not supported for lists"
+    with pytest.raises(ValueError, match=err_msg):
+        _safe_indexing(X, indices, axis=1)
+
+
+@pytest.mark.parametrize("array_type", ["array", "sparse", "dataframe"])
+def test_safe_assign(array_type):
+    """Check that `_safe_assign` works as expected."""
+    rng = np.random.RandomState(0)
+    X_array = rng.randn(10, 5)
+
+    row_indexer = [1, 2]
+    values = rng.randn(len(row_indexer), X_array.shape[1])
+    X = _convert_container(X_array, array_type)
+    _safe_assign(X, values, row_indexer=row_indexer)
+
+    assigned_portion = _safe_indexing(X, row_indexer, axis=0)
+    assert_allclose_dense_sparse(
+        assigned_portion, _convert_container(values, array_type)
+    )
+
+    column_indexer = [1, 2]
+    values = rng.randn(X_array.shape[0], len(column_indexer))
+    X = _convert_container(X_array, array_type)
+    _safe_assign(X, values, column_indexer=column_indexer)
+
+    assigned_portion = _safe_indexing(X, column_indexer, axis=1)
+    assert_allclose_dense_sparse(
+        assigned_portion, _convert_container(values, array_type)
+    )
+
+    row_indexer, column_indexer = None, None
+    values = rng.randn(*X.shape)
+    X = _convert_container(X_array, array_type)
+    _safe_assign(X, values, column_indexer=column_indexer)
+
+    assert_allclose_dense_sparse(X, _convert_container(values, array_type))
+
+
+@pytest.mark.parametrize(
+    "key, err_msg",
+    [
+        (10, r"all features must be in \[0, 2\]"),
+        ("whatever", "A given column is not a column of the dataframe"),
+        (object(), "No valid specification of the columns"),
+    ],
+)
+def test_get_column_indices_error(key, err_msg):
+    pd = pytest.importorskip("pandas")
+    X_df = pd.DataFrame(X_toy, columns=["col_0", "col_1", "col_2"])
+
+    with pytest.raises(ValueError, match=err_msg):
+        _get_column_indices(X_df, key)
+
+
+@pytest.mark.parametrize(
+    "key", [["col1"], ["col2"], ["col1", "col2"], ["col1", "col3"], ["col2", "col3"]]
+)
+def test_get_column_indices_pandas_nonunique_columns_error(key):
+    pd = pytest.importorskip("pandas")
+    toy = np.zeros((1, 5), dtype=int)
+    columns = ["col1", "col1", "col2", "col3", "col2"]
+    X = pd.DataFrame(toy, columns=columns)
+
+    err_msg = "Selected columns, {}, are not unique in dataframe".format(key)
+    with pytest.raises(ValueError) as exc_info:
+        _get_column_indices(X, key)
+    assert str(exc_info.value) == err_msg
+
+
+def test_get_column_indices_interchange():
+    """Check _get_column_indices for edge cases with the interchange"""
+    pd = pytest.importorskip("pandas", minversion="1.5")
+
+    df = pd.DataFrame([[1, 2, 3], [4, 5, 6]], columns=["a", "b", "c"])
+
+    # Hide the fact that this is a pandas dataframe to trigger the dataframe protocol
+    # code path.
+    class MockDataFrame:
+        def __init__(self, df):
+            self._df = df
+
+        def __getattr__(self, name):
+            return getattr(self._df, name)
+
+    df_mocked = MockDataFrame(df)
+
+    key_results = [
+        (slice(1, None), [1, 2]),
+        (slice(None, 2), [0, 1]),
+        (slice(1, 2), [1]),
+        (["b", "c"], [1, 2]),
+        (slice("a", "b"), [0, 1]),
+        (slice("a", None), [0, 1, 2]),
+        (slice(None, "a"), [0]),
+        (["c", "a"], [2, 0]),
+        ([], []),
+    ]
+    for key, result in key_results:
+        assert _get_column_indices(df_mocked, key) == result
+
+    msg = "A given column is not a column of the dataframe"
+    with pytest.raises(ValueError, match=msg):
+        _get_column_indices(df_mocked, ["not_a_column"])
+
+    msg = "key.step must be 1 or None"
+    with pytest.raises(NotImplementedError, match=msg):
+        _get_column_indices(df_mocked, slice("a", None, 2))
+
+
+def test_resample():
+    # Border case not worth mentioning in doctests
+    assert resample() is None
+
+    # Check that invalid arguments yield ValueError
+    with pytest.raises(ValueError):
+        resample([0], [0, 1])
+    with pytest.raises(ValueError):
+        resample([0, 1], [0, 1], replace=False, n_samples=3)
+
+    # Issue:6581, n_samples can be more when replace is True (default).
+    assert len(resample([1, 2], n_samples=5)) == 5
+
+
+def test_resample_stratified():
+    # Make sure resample can stratify
+    rng = np.random.RandomState(0)
+    n_samples = 100
+    p = 0.9
+    X = rng.normal(size=(n_samples, 1))
+    y = rng.binomial(1, p, size=n_samples)
+
+    _, y_not_stratified = resample(X, y, n_samples=10, random_state=0, stratify=None)
+    assert np.all(y_not_stratified == 1)
+
+    _, y_stratified = resample(X, y, n_samples=10, random_state=0, stratify=y)
+    assert not np.all(y_stratified == 1)
+    assert np.sum(y_stratified) == 9  # all 1s, one 0
+
+
+def test_resample_stratified_replace():
+    # Make sure stratified resampling supports the replace parameter
+    rng = np.random.RandomState(0)
+    n_samples = 100
+    X = rng.normal(size=(n_samples, 1))
+    y = rng.randint(0, 2, size=n_samples)
+
+    X_replace, _ = resample(
+        X, y, replace=True, n_samples=50, random_state=rng, stratify=y
+    )
+    X_no_replace, _ = resample(
+        X, y, replace=False, n_samples=50, random_state=rng, stratify=y
+    )
+    assert np.unique(X_replace).shape[0] < 50
+    assert np.unique(X_no_replace).shape[0] == 50
+
+    # make sure n_samples can be greater than X.shape[0] if we sample with
+    # replacement
+    X_replace, _ = resample(
+        X, y, replace=True, n_samples=1000, random_state=rng, stratify=y
+    )
+    assert X_replace.shape[0] == 1000
+    assert np.unique(X_replace).shape[0] == 100
+
+
+def test_resample_stratify_2dy():
+    # Make sure y can be 2d when stratifying
+    rng = np.random.RandomState(0)
+    n_samples = 100
+    X = rng.normal(size=(n_samples, 1))
+    y = rng.randint(0, 2, size=(n_samples, 2))
+    X, y = resample(X, y, n_samples=50, random_state=rng, stratify=y)
+    assert y.ndim == 2
+
+
+@pytest.mark.parametrize("csr_container", CSR_CONTAINERS)
+def test_resample_stratify_sparse_error(csr_container):
+    # resample must be ndarray
+    rng = np.random.RandomState(0)
+    n_samples = 100
+    X = rng.normal(size=(n_samples, 2))
+    y = rng.randint(0, 2, size=n_samples)
+    stratify = csr_container(y.reshape(-1, 1))
+    with pytest.raises(TypeError, match="Sparse data was passed"):
+        X, y = resample(X, y, n_samples=50, random_state=rng, stratify=stratify)
+
+
+def test_shuffle_on_ndim_equals_three():
+    def to_tuple(A):  # to make the inner arrays hashable
+        return tuple(tuple(tuple(C) for C in B) for B in A)
+
+    A = np.array([[[1, 2], [3, 4]], [[5, 6], [7, 8]]])  # A.shape = (2,2,2)
+    S = set(to_tuple(A))
+    shuffle(A)  # shouldn't raise a ValueError for dim = 3
+    assert set(to_tuple(A)) == S
+
+
+@pytest.mark.parametrize("csc_container", CSC_CONTAINERS)
+def test_shuffle_dont_convert_to_array(csc_container):
+    # Check that shuffle does not try to convert to numpy arrays with float
+    # dtypes can let any indexable datastructure pass-through.
+    a = ["a", "b", "c"]
+    b = np.array(["a", "b", "c"], dtype=object)
+    c = [1, 2, 3]
+    d = MockDataFrame(np.array([["a", 0], ["b", 1], ["c", 2]], dtype=object))
+    e = csc_container(np.arange(6).reshape(3, 2))
+    a_s, b_s, c_s, d_s, e_s = shuffle(a, b, c, d, e, random_state=0)
+
+    assert a_s == ["c", "b", "a"]
+    assert type(a_s) == list  # noqa: E721
+
+    assert_array_equal(b_s, ["c", "b", "a"])
+    assert b_s.dtype == object
+
+    assert c_s == [3, 2, 1]
+    assert type(c_s) == list  # noqa: E721
+
+    assert_array_equal(d_s, np.array([["c", 2], ["b", 1], ["a", 0]], dtype=object))
+    assert type(d_s) == MockDataFrame  # noqa: E721
+
+    assert_array_equal(e_s.toarray(), np.array([[4, 5], [2, 3], [0, 1]]))
--- a/Show More
+++ b/Show More