feat: initial commit - Phase 1 & 2 core features

2026-04-22 17:07:33 +08:00
commit 1773bda06b
25005 changed files with 6252106 additions and 0 deletions
@@ -0,0 +1,520 @@
+from functools import partial
+
+import numpy as np
+from numpy.testing import assert_array_equal
+
+from sklearn.base import (
+    BaseEstimator,
+    ClassifierMixin,
+    MetaEstimatorMixin,
+    RegressorMixin,
+    TransformerMixin,
+    clone,
+)
+from sklearn.metrics._scorer import _Scorer, mean_squared_error
+from sklearn.model_selection import BaseCrossValidator
+from sklearn.model_selection._split import GroupsConsumerMixin
+from sklearn.utils._metadata_requests import (
+    SIMPLE_METHODS,
+)
+from sklearn.utils.metadata_routing import (
+    MetadataRouter,
+    MethodMapping,
+    process_routing,
+)
+from sklearn.utils.multiclass import _check_partial_fit_first_call
+
+
+def record_metadata(obj, method, record_default=True, **kwargs):
+    """Utility function to store passed metadata to a method.
+
+    If record_default is False, kwargs whose values are "default" are skipped.
+    This is so that checks on keyword arguments whose default was not changed
+    are skipped.
+
+    """
+    if not hasattr(obj, "_records"):
+        obj._records = {}
+    if not record_default:
+        kwargs = {
+            key: val
+            for key, val in kwargs.items()
+            if not isinstance(val, str) or (val != "default")
+        }
+    obj._records[method] = kwargs
+
+
+def check_recorded_metadata(obj, method, split_params=tuple(), **kwargs):
+    """Check whether the expected metadata is passed to the object's method.
+
+    Parameters
+    ----------
+    obj : estimator object
+        sub-estimator to check routed params for
+    method : str
+        sub-estimator's method where metadata is routed to
+    split_params : tuple, default=empty
+        specifies any parameters which are to be checked as being a subset
+        of the original values
+    **kwargs : dict
+        passed metadata
+    """
+    records = getattr(obj, "_records", dict()).get(method, dict())
+    assert set(kwargs.keys()) == set(
+        records.keys()
+    ), f"Expected {kwargs.keys()} vs {records.keys()}"
+    for key, value in kwargs.items():
+        recorded_value = records[key]
+        # The following condition is used to check for any specified parameters
+        # being a subset of the original values
+        if key in split_params and recorded_value is not None:
+            assert np.isin(recorded_value, value).all()
+        else:
+            if isinstance(recorded_value, np.ndarray):
+                assert_array_equal(recorded_value, value)
+            else:
+                assert recorded_value is value, f"Expected {recorded_value} vs {value}"
+
+
+record_metadata_not_default = partial(record_metadata, record_default=False)
+
+
+def assert_request_is_empty(metadata_request, exclude=None):
+    """Check if a metadata request dict is empty.
+
+    One can exclude a method or a list of methods from the check using the
+    ``exclude`` parameter. If metadata_request is a MetadataRouter, then
+    ``exclude`` can be of the form ``{"object" : [method, ...]}``.
+    """
+    if isinstance(metadata_request, MetadataRouter):
+        for name, route_mapping in metadata_request:
+            if exclude is not None and name in exclude:
+                _exclude = exclude[name]
+            else:
+                _exclude = None
+            assert_request_is_empty(route_mapping.router, exclude=_exclude)
+        return
+
+    exclude = [] if exclude is None else exclude
+    for method in SIMPLE_METHODS:
+        if method in exclude:
+            continue
+        mmr = getattr(metadata_request, method)
+        props = [
+            prop
+            for prop, alias in mmr.requests.items()
+            if isinstance(alias, str) or alias is not None
+        ]
+        assert not props
+
+
+def assert_request_equal(request, dictionary):
+    for method, requests in dictionary.items():
+        mmr = getattr(request, method)
+        assert mmr.requests == requests
+
+    empty_methods = [method for method in SIMPLE_METHODS if method not in dictionary]
+    for method in empty_methods:
+        assert not len(getattr(request, method).requests)
+
+
+class _Registry(list):
+    # This list is used to get a reference to the sub-estimators, which are not
+    # necessarily stored on the metaestimator. We need to override __deepcopy__
+    # because the sub-estimators are probably cloned, which would result in a
+    # new copy of the list, but we need copy and deep copy both to return the
+    # same instance.
+    def __deepcopy__(self, memo):
+        return self
+
+    def __copy__(self):
+        return self
+
+
+class ConsumingRegressor(RegressorMixin, BaseEstimator):
+    """A regressor consuming metadata.
+
+    Parameters
+    ----------
+    registry : list, default=None
+        If a list, the estimator will append itself to the list in order to have
+        a reference to the estimator later on. Since that reference is not
+        required in all tests, registration can be skipped by leaving this value
+        as None.
+    """
+
+    def __init__(self, registry=None):
+        self.registry = registry
+
+    def partial_fit(self, X, y, sample_weight="default", metadata="default"):
+        if self.registry is not None:
+            self.registry.append(self)
+
+        record_metadata_not_default(
+            self, "partial_fit", sample_weight=sample_weight, metadata=metadata
+        )
+        return self
+
+    def fit(self, X, y, sample_weight="default", metadata="default"):
+        if self.registry is not None:
+            self.registry.append(self)
+
+        record_metadata_not_default(
+            self, "fit", sample_weight=sample_weight, metadata=metadata
+        )
+        return self
+
+    def predict(self, X, y=None, sample_weight="default", metadata="default"):
+        record_metadata_not_default(
+            self, "predict", sample_weight=sample_weight, metadata=metadata
+        )
+        return np.zeros(shape=(len(X),))
+
+    def score(self, X, y, sample_weight="default", metadata="default"):
+        record_metadata_not_default(
+            self, "score", sample_weight=sample_weight, metadata=metadata
+        )
+        return 1
+
+
+class NonConsumingClassifier(ClassifierMixin, BaseEstimator):
+    """A classifier which accepts no metadata on any method."""
+
+    def __init__(self, alpha=0.0):
+        self.alpha = alpha
+
+    def fit(self, X, y):
+        self.classes_ = np.unique(y)
+        return self
+
+    def partial_fit(self, X, y, classes=None):
+        return self
+
+    def decision_function(self, X):
+        return self.predict(X)
+
+    def predict(self, X):
+        y_pred = np.empty(shape=(len(X),))
+        y_pred[: len(X) // 2] = 0
+        y_pred[len(X) // 2 :] = 1
+        return y_pred
+
+
+class NonConsumingRegressor(RegressorMixin, BaseEstimator):
+    """A classifier which accepts no metadata on any method."""
+
+    def fit(self, X, y):
+        return self
+
+    def partial_fit(self, X, y):
+        return self
+
+    def predict(self, X):
+        return np.ones(len(X))  # pragma: no cover
+
+
+class ConsumingClassifier(ClassifierMixin, BaseEstimator):
+    """A classifier consuming metadata.
+
+    Parameters
+    ----------
+    registry : list, default=None
+        If a list, the estimator will append itself to the list in order to have
+        a reference to the estimator later on. Since that reference is not
+        required in all tests, registration can be skipped by leaving this value
+        as None.
+
+    alpha : float, default=0
+        This parameter is only used to test the ``*SearchCV`` objects, and
+        doesn't do anything.
+    """
+
+    def __init__(self, registry=None, alpha=0.0):
+        self.alpha = alpha
+        self.registry = registry
+
+    def partial_fit(
+        self, X, y, classes=None, sample_weight="default", metadata="default"
+    ):
+        if self.registry is not None:
+            self.registry.append(self)
+
+        record_metadata_not_default(
+            self, "partial_fit", sample_weight=sample_weight, metadata=metadata
+        )
+        _check_partial_fit_first_call(self, classes)
+        return self
+
+    def fit(self, X, y, sample_weight="default", metadata="default"):
+        if self.registry is not None:
+            self.registry.append(self)
+
+        record_metadata_not_default(
+            self, "fit", sample_weight=sample_weight, metadata=metadata
+        )
+
+        self.classes_ = np.unique(y)
+        return self
+
+    def predict(self, X, sample_weight="default", metadata="default"):
+        record_metadata_not_default(
+            self, "predict", sample_weight=sample_weight, metadata=metadata
+        )
+        y_score = np.empty(shape=(len(X),), dtype="int8")
+        y_score[len(X) // 2 :] = 0
+        y_score[: len(X) // 2] = 1
+        return y_score
+
+    def predict_proba(self, X, sample_weight="default", metadata="default"):
+        record_metadata_not_default(
+            self, "predict_proba", sample_weight=sample_weight, metadata=metadata
+        )
+        y_proba = np.empty(shape=(len(X), 2))
+        y_proba[: len(X) // 2, :] = np.asarray([1.0, 0.0])
+        y_proba[len(X) // 2 :, :] = np.asarray([0.0, 1.0])
+        return y_proba
+
+    def predict_log_proba(self, X, sample_weight="default", metadata="default"):
+        pass  # pragma: no cover
+
+        # uncomment when needed
+        # record_metadata_not_default(
+        #     self, "predict_log_proba", sample_weight=sample_weight, metadata=metadata
+        # )
+        # return np.zeros(shape=(len(X), 2))
+
+    def decision_function(self, X, sample_weight="default", metadata="default"):
+        record_metadata_not_default(
+            self, "predict_proba", sample_weight=sample_weight, metadata=metadata
+        )
+        y_score = np.empty(shape=(len(X),))
+        y_score[len(X) // 2 :] = 0
+        y_score[: len(X) // 2] = 1
+        return y_score
+
+    # uncomment when needed
+    # def score(self, X, y, sample_weight="default", metadata="default"):
+    # record_metadata_not_default(
+    #    self, "score", sample_weight=sample_weight, metadata=metadata
+    # )
+    # return 1
+
+
+class ConsumingTransformer(TransformerMixin, BaseEstimator):
+    """A transformer which accepts metadata on fit and transform.
+
+    Parameters
+    ----------
+    registry : list, default=None
+        If a list, the estimator will append itself to the list in order to have
+        a reference to the estimator later on. Since that reference is not
+        required in all tests, registration can be skipped by leaving this value
+        as None.
+    """
+
+    def __init__(self, registry=None):
+        self.registry = registry
+
+    def fit(self, X, y=None, sample_weight=None, metadata=None):
+        if self.registry is not None:
+            self.registry.append(self)
+
+        record_metadata_not_default(
+            self, "fit", sample_weight=sample_weight, metadata=metadata
+        )
+        return self
+
+    def transform(self, X, sample_weight=None, metadata=None):
+        record_metadata(
+            self, "transform", sample_weight=sample_weight, metadata=metadata
+        )
+        return X
+
+    def fit_transform(self, X, y, sample_weight=None, metadata=None):
+        # implementing ``fit_transform`` is necessary since
+        # ``TransformerMixin.fit_transform`` doesn't route any metadata to
+        # ``transform``, while here we want ``transform`` to receive
+        # ``sample_weight`` and ``metadata``.
+        record_metadata(
+            self, "fit_transform", sample_weight=sample_weight, metadata=metadata
+        )
+        return self.fit(X, y, sample_weight=sample_weight, metadata=metadata).transform(
+            X, sample_weight=sample_weight, metadata=metadata
+        )
+
+    def inverse_transform(self, X, sample_weight=None, metadata=None):
+        record_metadata(
+            self, "inverse_transform", sample_weight=sample_weight, metadata=metadata
+        )
+        return X
+
+
+class ConsumingNoFitTransformTransformer(BaseEstimator):
+    """A metadata consuming transformer that doesn't inherit from
+    TransformerMixin, and thus doesn't implement `fit_transform`. Note that
+    TransformerMixin's `fit_transform` doesn't route metadata to `transform`."""
+
+    def __init__(self, registry=None):
+        self.registry = registry
+
+    def fit(self, X, y=None, sample_weight=None, metadata=None):
+        if self.registry is not None:
+            self.registry.append(self)
+
+        record_metadata(self, "fit", sample_weight=sample_weight, metadata=metadata)
+
+        return self
+
+    def transform(self, X, sample_weight=None, metadata=None):
+        record_metadata(
+            self, "transform", sample_weight=sample_weight, metadata=metadata
+        )
+        return X
+
+
+class ConsumingScorer(_Scorer):
+    def __init__(self, registry=None):
+        super().__init__(
+            score_func=mean_squared_error, sign=1, kwargs={}, response_method="predict"
+        )
+        self.registry = registry
+
+    def _score(self, method_caller, clf, X, y, **kwargs):
+        if self.registry is not None:
+            self.registry.append(self)
+
+        record_metadata_not_default(self, "score", **kwargs)
+
+        sample_weight = kwargs.get("sample_weight", None)
+        return super()._score(method_caller, clf, X, y, sample_weight=sample_weight)
+
+
+class ConsumingSplitter(GroupsConsumerMixin, BaseCrossValidator):
+    def __init__(self, registry=None):
+        self.registry = registry
+
+    def split(self, X, y=None, groups="default", metadata="default"):
+        if self.registry is not None:
+            self.registry.append(self)
+
+        record_metadata_not_default(self, "split", groups=groups, metadata=metadata)
+
+        split_index = len(X) // 2
+        train_indices = list(range(0, split_index))
+        test_indices = list(range(split_index, len(X)))
+        yield test_indices, train_indices
+        yield train_indices, test_indices
+
+    def get_n_splits(self, X=None, y=None, groups=None, metadata=None):
+        return 2
+
+    def _iter_test_indices(self, X=None, y=None, groups=None):
+        split_index = len(X) // 2
+        train_indices = list(range(0, split_index))
+        test_indices = list(range(split_index, len(X)))
+        yield test_indices
+        yield train_indices
+
+
+class MetaRegressor(MetaEstimatorMixin, RegressorMixin, BaseEstimator):
+    """A meta-regressor which is only a router."""
+
+    def __init__(self, estimator):
+        self.estimator = estimator
+
+    def fit(self, X, y, **fit_params):
+        params = process_routing(self, "fit", **fit_params)
+        self.estimator_ = clone(self.estimator).fit(X, y, **params.estimator.fit)
+
+    def get_metadata_routing(self):
+        router = MetadataRouter(owner=self.__class__.__name__).add(
+            estimator=self.estimator,
+            method_mapping=MethodMapping().add(caller="fit", callee="fit"),
+        )
+        return router
+
+
+class WeightedMetaRegressor(MetaEstimatorMixin, RegressorMixin, BaseEstimator):
+    """A meta-regressor which is also a consumer."""
+
+    def __init__(self, estimator, registry=None):
+        self.estimator = estimator
+        self.registry = registry
+
+    def fit(self, X, y, sample_weight=None, **fit_params):
+        if self.registry is not None:
+            self.registry.append(self)
+
+        record_metadata(self, "fit", sample_weight=sample_weight)
+        params = process_routing(self, "fit", sample_weight=sample_weight, **fit_params)
+        self.estimator_ = clone(self.estimator).fit(X, y, **params.estimator.fit)
+        return self
+
+    def predict(self, X, **predict_params):
+        params = process_routing(self, "predict", **predict_params)
+        return self.estimator_.predict(X, **params.estimator.predict)
+
+    def get_metadata_routing(self):
+        router = (
+            MetadataRouter(owner=self.__class__.__name__)
+            .add_self_request(self)
+            .add(
+                estimator=self.estimator,
+                method_mapping=MethodMapping()
+                .add(caller="fit", callee="fit")
+                .add(caller="predict", callee="predict"),
+            )
+        )
+        return router
+
+
+class WeightedMetaClassifier(MetaEstimatorMixin, ClassifierMixin, BaseEstimator):
+    """A meta-estimator which also consumes sample_weight itself in ``fit``."""
+
+    def __init__(self, estimator, registry=None):
+        self.estimator = estimator
+        self.registry = registry
+
+    def fit(self, X, y, sample_weight=None, **kwargs):
+        if self.registry is not None:
+            self.registry.append(self)
+
+        record_metadata(self, "fit", sample_weight=sample_weight)
+        params = process_routing(self, "fit", sample_weight=sample_weight, **kwargs)
+        self.estimator_ = clone(self.estimator).fit(X, y, **params.estimator.fit)
+        return self
+
+    def get_metadata_routing(self):
+        router = (
+            MetadataRouter(owner=self.__class__.__name__)
+            .add_self_request(self)
+            .add(
+                estimator=self.estimator,
+                method_mapping=MethodMapping().add(caller="fit", callee="fit"),
+            )
+        )
+        return router
+
+
+class MetaTransformer(MetaEstimatorMixin, TransformerMixin, BaseEstimator):
+    """A simple meta-transformer."""
+
+    def __init__(self, transformer):
+        self.transformer = transformer
+
+    def fit(self, X, y=None, **fit_params):
+        params = process_routing(self, "fit", **fit_params)
+        self.transformer_ = clone(self.transformer).fit(X, y, **params.transformer.fit)
+        return self
+
+    def transform(self, X, y=None, **transform_params):
+        params = process_routing(self, "transform", **transform_params)
+        return self.transformer_.transform(X, **params.transformer.transform)
+
+    def get_metadata_routing(self):
+        return MetadataRouter(owner=self.__class__.__name__).add(
+            transformer=self.transformer,
+            method_mapping=MethodMapping()
+            .add(caller="fit", callee="fit")
+            .add(caller="transform", callee="transform"),
+        )
@@ -0,0 +1,85 @@
+"""global_random_seed fixture
+
+The goal of this fixture is to prevent tests that use it to be sensitive
+to a specific seed value while still being deterministic by default.
+
+See the documentation for the SKLEARN_TESTS_GLOBAL_RANDOM_SEED
+variable for insrtuctions on how to use this fixture.
+
+https://scikit-learn.org/dev/computing/parallelism.html#sklearn-tests-global-random-seed
+"""
+
+from os import environ
+from random import Random
+
+import pytest
+
+
+# Passes the main worker's random seeds to workers
+class XDistHooks:
+    def pytest_configure_node(self, node) -> None:
+        random_seeds = node.config.getoption("random_seeds")
+        node.workerinput["random_seeds"] = random_seeds
+
+
+def pytest_configure(config):
+    if config.pluginmanager.hasplugin("xdist"):
+        config.pluginmanager.register(XDistHooks())
+
+    RANDOM_SEED_RANGE = list(range(100))  # All seeds in [0, 99] should be valid.
+    random_seed_var = environ.get("SKLEARN_TESTS_GLOBAL_RANDOM_SEED")
+    if hasattr(config, "workerinput") and "random_seeds" in config.workerinput:
+        # Set worker random seed from seed generated from main process
+        random_seeds = config.workerinput["random_seeds"]
+    elif random_seed_var is None:
+        # This is the way.
+        random_seeds = [42]
+    elif random_seed_var == "any":
+        # Pick-up one seed at random in the range of admissible random seeds.
+        random_seeds = [Random().choice(RANDOM_SEED_RANGE)]
+    elif random_seed_var == "all":
+        random_seeds = RANDOM_SEED_RANGE
+    else:
+        if "-" in random_seed_var:
+            start, stop = random_seed_var.split("-")
+            random_seeds = list(range(int(start), int(stop) + 1))
+        else:
+            random_seeds = [int(random_seed_var)]
+
+        if min(random_seeds) < 0 or max(random_seeds) > 99:
+            raise ValueError(
+                "The value(s) of the environment variable "
+                "SKLEARN_TESTS_GLOBAL_RANDOM_SEED must be in the range [0, 99] "
+                f"(or 'any' or 'all'), got: {random_seed_var}"
+            )
+    config.option.random_seeds = random_seeds
+
+    class GlobalRandomSeedPlugin:
+        @pytest.fixture(params=random_seeds)
+        def global_random_seed(self, request):
+            """Fixture to ask for a random yet controllable random seed.
+
+            All tests that use this fixture accept the contract that they should
+            deterministically pass for any seed value from 0 to 99 included.
+
+            See the documentation for the SKLEARN_TESTS_GLOBAL_RANDOM_SEED
+            variable for insrtuctions on how to use this fixture.
+
+            https://scikit-learn.org/dev/computing/parallelism.html#sklearn-tests-global-random-seed
+            """
+            yield request.param
+
+    config.pluginmanager.register(GlobalRandomSeedPlugin())
+
+
+def pytest_report_header(config):
+    random_seed_var = environ.get("SKLEARN_TESTS_GLOBAL_RANDOM_SEED")
+    if random_seed_var == "any":
+        return [
+            "To reproduce this test run, set the following environment variable:",
+            f'    SKLEARN_TESTS_GLOBAL_RANDOM_SEED="{config.option.random_seeds[0]}"',
+            (
+                "See: https://scikit-learn.org/dev/computing/parallelism.html"
+                "#sklearn-tests-global-random-seed"
+            ),
+        ]
@@ -0,0 +1,921 @@
+# Author: Gael Varoquaux
+# License: BSD 3 clause
+
+import pickle
+import re
+import warnings
+
+import numpy as np
+import pytest
+import scipy.sparse as sp
+from numpy.testing import assert_allclose
+
+import sklearn
+from sklearn import config_context, datasets
+from sklearn.base import (
+    BaseEstimator,
+    OutlierMixin,
+    TransformerMixin,
+    clone,
+    is_classifier,
+)
+from sklearn.decomposition import PCA
+from sklearn.exceptions import InconsistentVersionWarning
+from sklearn.model_selection import GridSearchCV
+from sklearn.pipeline import Pipeline
+from sklearn.preprocessing import StandardScaler
+from sklearn.svm import SVC
+from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
+from sklearn.utils._mocking import MockDataFrame
+from sklearn.utils._set_output import _get_output_config
+from sklearn.utils._testing import (
+    _convert_container,
+    assert_array_equal,
+    assert_no_warnings,
+    ignore_warnings,
+)
+
+
+#############################################################################
+# A few test classes
+class MyEstimator(BaseEstimator):
+    def __init__(self, l1=0, empty=None):
+        self.l1 = l1
+        self.empty = empty
+
+
+class K(BaseEstimator):
+    def __init__(self, c=None, d=None):
+        self.c = c
+        self.d = d
+
+
+class T(BaseEstimator):
+    def __init__(self, a=None, b=None):
+        self.a = a
+        self.b = b
+
+
+class NaNTag(BaseEstimator):
+    def _more_tags(self):
+        return {"allow_nan": True}
+
+
+class NoNaNTag(BaseEstimator):
+    def _more_tags(self):
+        return {"allow_nan": False}
+
+
+class OverrideTag(NaNTag):
+    def _more_tags(self):
+        return {"allow_nan": False}
+
+
+class DiamondOverwriteTag(NaNTag, NoNaNTag):
+    def _more_tags(self):
+        return dict()
+
+
+class InheritDiamondOverwriteTag(DiamondOverwriteTag):
+    pass
+
+
+class ModifyInitParams(BaseEstimator):
+    """Deprecated behavior.
+    Equal parameters but with a type cast.
+    Doesn't fulfill a is a
+    """
+
+    def __init__(self, a=np.array([0])):
+        self.a = a.copy()
+
+
+class Buggy(BaseEstimator):
+    "A buggy estimator that does not set its parameters right."
+
+    def __init__(self, a=None):
+        self.a = 1
+
+
+class NoEstimator:
+    def __init__(self):
+        pass
+
+    def fit(self, X=None, y=None):
+        return self
+
+    def predict(self, X=None):
+        return None
+
+
+class VargEstimator(BaseEstimator):
+    """scikit-learn estimators shouldn't have vargs."""
+
+    def __init__(self, *vargs):
+        pass
+
+
+#############################################################################
+# The tests
+
+
+def test_clone():
+    # Tests that clone creates a correct deep copy.
+    # We create an estimator, make a copy of its original state
+    # (which, in this case, is the current state of the estimator),
+    # and check that the obtained copy is a correct deep copy.
+
+    from sklearn.feature_selection import SelectFpr, f_classif
+
+    selector = SelectFpr(f_classif, alpha=0.1)
+    new_selector = clone(selector)
+    assert selector is not new_selector
+    assert selector.get_params() == new_selector.get_params()
+
+    selector = SelectFpr(f_classif, alpha=np.zeros((10, 2)))
+    new_selector = clone(selector)
+    assert selector is not new_selector
+
+
+def test_clone_2():
+    # Tests that clone doesn't copy everything.
+    # We first create an estimator, give it an own attribute, and
+    # make a copy of its original state. Then we check that the copy doesn't
+    # have the specific attribute we manually added to the initial estimator.
+
+    from sklearn.feature_selection import SelectFpr, f_classif
+
+    selector = SelectFpr(f_classif, alpha=0.1)
+    selector.own_attribute = "test"
+    new_selector = clone(selector)
+    assert not hasattr(new_selector, "own_attribute")
+
+
+def test_clone_buggy():
+    # Check that clone raises an error on buggy estimators.
+    buggy = Buggy()
+    buggy.a = 2
+    with pytest.raises(RuntimeError):
+        clone(buggy)
+
+    no_estimator = NoEstimator()
+    with pytest.raises(TypeError):
+        clone(no_estimator)
+
+    varg_est = VargEstimator()
+    with pytest.raises(RuntimeError):
+        clone(varg_est)
+
+    est = ModifyInitParams()
+    with pytest.raises(RuntimeError):
+        clone(est)
+
+
+def test_clone_empty_array():
+    # Regression test for cloning estimators with empty arrays
+    clf = MyEstimator(empty=np.array([]))
+    clf2 = clone(clf)
+    assert_array_equal(clf.empty, clf2.empty)
+
+    clf = MyEstimator(empty=sp.csr_matrix(np.array([[0]])))
+    clf2 = clone(clf)
+    assert_array_equal(clf.empty.data, clf2.empty.data)
+
+
+def test_clone_nan():
+    # Regression test for cloning estimators with default parameter as np.nan
+    clf = MyEstimator(empty=np.nan)
+    clf2 = clone(clf)
+
+    assert clf.empty is clf2.empty
+
+
+def test_clone_dict():
+    # test that clone creates a clone of a dict
+    orig = {"a": MyEstimator()}
+    cloned = clone(orig)
+    assert orig["a"] is not cloned["a"]
+
+
+def test_clone_sparse_matrices():
+    sparse_matrix_classes = [
+        cls
+        for name in dir(sp)
+        if name.endswith("_matrix") and type(cls := getattr(sp, name)) is type
+    ]
+
+    for cls in sparse_matrix_classes:
+        sparse_matrix = cls(np.eye(5))
+        clf = MyEstimator(empty=sparse_matrix)
+        clf_cloned = clone(clf)
+        assert clf.empty.__class__ is clf_cloned.empty.__class__
+        assert_array_equal(clf.empty.toarray(), clf_cloned.empty.toarray())
+
+
+def test_clone_estimator_types():
+    # Check that clone works for parameters that are types rather than
+    # instances
+    clf = MyEstimator(empty=MyEstimator)
+    clf2 = clone(clf)
+
+    assert clf.empty is clf2.empty
+
+
+def test_clone_class_rather_than_instance():
+    # Check that clone raises expected error message when
+    # cloning class rather than instance
+    msg = "You should provide an instance of scikit-learn estimator"
+    with pytest.raises(TypeError, match=msg):
+        clone(MyEstimator)
+
+
+def test_repr():
+    # Smoke test the repr of the base estimator.
+    my_estimator = MyEstimator()
+    repr(my_estimator)
+    test = T(K(), K())
+    assert repr(test) == "T(a=K(), b=K())"
+
+    some_est = T(a=["long_params"] * 1000)
+    assert len(repr(some_est)) == 485
+
+
+def test_str():
+    # Smoke test the str of the base estimator
+    my_estimator = MyEstimator()
+    str(my_estimator)
+
+
+def test_get_params():
+    test = T(K(), K)
+
+    assert "a__d" in test.get_params(deep=True)
+    assert "a__d" not in test.get_params(deep=False)
+
+    test.set_params(a__d=2)
+    assert test.a.d == 2
+
+    with pytest.raises(ValueError):
+        test.set_params(a__a=2)
+
+
+def test_is_classifier():
+    svc = SVC()
+    assert is_classifier(svc)
+    assert is_classifier(GridSearchCV(svc, {"C": [0.1, 1]}))
+    assert is_classifier(Pipeline([("svc", svc)]))
+    assert is_classifier(Pipeline([("svc_cv", GridSearchCV(svc, {"C": [0.1, 1]}))]))
+
+
+def test_set_params():
+    # test nested estimator parameter setting
+    clf = Pipeline([("svc", SVC())])
+
+    # non-existing parameter in svc
+    with pytest.raises(ValueError):
+        clf.set_params(svc__stupid_param=True)
+
+    # non-existing parameter of pipeline
+    with pytest.raises(ValueError):
+        clf.set_params(svm__stupid_param=True)
+
+    # we don't currently catch if the things in pipeline are estimators
+    # bad_pipeline = Pipeline([("bad", NoEstimator())])
+    # assert_raises(AttributeError, bad_pipeline.set_params,
+    #               bad__stupid_param=True)
+
+
+def test_set_params_passes_all_parameters():
+    # Make sure all parameters are passed together to set_params
+    # of nested estimator. Regression test for #9944
+
+    class TestDecisionTree(DecisionTreeClassifier):
+        def set_params(self, **kwargs):
+            super().set_params(**kwargs)
+            # expected_kwargs is in test scope
+            assert kwargs == expected_kwargs
+            return self
+
+    expected_kwargs = {"max_depth": 5, "min_samples_leaf": 2}
+    for est in [
+        Pipeline([("estimator", TestDecisionTree())]),
+        GridSearchCV(TestDecisionTree(), {}),
+    ]:
+        est.set_params(estimator__max_depth=5, estimator__min_samples_leaf=2)
+
+
+def test_set_params_updates_valid_params():
+    # Check that set_params tries to set SVC().C, not
+    # DecisionTreeClassifier().C
+    gscv = GridSearchCV(DecisionTreeClassifier(), {})
+    gscv.set_params(estimator=SVC(), estimator__C=42.0)
+    assert gscv.estimator.C == 42.0
+
+
+@pytest.mark.parametrize(
+    "tree,dataset",
+    [
+        (
+            DecisionTreeClassifier(max_depth=2, random_state=0),
+            datasets.make_classification(random_state=0),
+        ),
+        (
+            DecisionTreeRegressor(max_depth=2, random_state=0),
+            datasets.make_regression(random_state=0),
+        ),
+    ],
+)
+def test_score_sample_weight(tree, dataset):
+    rng = np.random.RandomState(0)
+    # check that the score with and without sample weights are different
+    X, y = dataset
+
+    tree.fit(X, y)
+    # generate random sample weights
+    sample_weight = rng.randint(1, 10, size=len(y))
+    score_unweighted = tree.score(X, y)
+    score_weighted = tree.score(X, y, sample_weight=sample_weight)
+    msg = "Unweighted and weighted scores are unexpectedly equal"
+    assert score_unweighted != score_weighted, msg
+
+
+def test_clone_pandas_dataframe():
+    class DummyEstimator(TransformerMixin, BaseEstimator):
+        """This is a dummy class for generating numerical features
+
+        This feature extractor extracts numerical features from pandas data
+        frame.
+
+        Parameters
+        ----------
+
+        df: pandas data frame
+            The pandas data frame parameter.
+
+        Notes
+        -----
+        """
+
+        def __init__(self, df=None, scalar_param=1):
+            self.df = df
+            self.scalar_param = scalar_param
+
+        def fit(self, X, y=None):
+            pass
+
+        def transform(self, X):
+            pass
+
+    # build and clone estimator
+    d = np.arange(10)
+    df = MockDataFrame(d)
+    e = DummyEstimator(df, scalar_param=1)
+    cloned_e = clone(e)
+
+    # the test
+    assert (e.df == cloned_e.df).values.all()
+    assert e.scalar_param == cloned_e.scalar_param
+
+
+def test_clone_protocol():
+    """Checks that clone works with `__sklearn_clone__` protocol."""
+
+    class FrozenEstimator(BaseEstimator):
+        def __init__(self, fitted_estimator):
+            self.fitted_estimator = fitted_estimator
+
+        def __getattr__(self, name):
+            return getattr(self.fitted_estimator, name)
+
+        def __sklearn_clone__(self):
+            return self
+
+        def fit(self, *args, **kwargs):
+            return self
+
+        def fit_transform(self, *args, **kwargs):
+            return self.fitted_estimator.transform(*args, **kwargs)
+
+    X = np.array([[-1, -1], [-2, -1], [-3, -2]])
+    pca = PCA().fit(X)
+    components = pca.components_
+
+    frozen_pca = FrozenEstimator(pca)
+    assert_allclose(frozen_pca.components_, components)
+
+    # Calling PCA methods such as `get_feature_names_out` still works
+    assert_array_equal(frozen_pca.get_feature_names_out(), pca.get_feature_names_out())
+
+    # Fitting on a new data does not alter `components_`
+    X_new = np.asarray([[-1, 2], [3, 4], [1, 2]])
+    frozen_pca.fit(X_new)
+    assert_allclose(frozen_pca.components_, components)
+
+    # `fit_transform` does not alter state
+    frozen_pca.fit_transform(X_new)
+    assert_allclose(frozen_pca.components_, components)
+
+    # Cloning estimator is a no-op
+    clone_frozen_pca = clone(frozen_pca)
+    assert clone_frozen_pca is frozen_pca
+    assert_allclose(clone_frozen_pca.components_, components)
+
+
+def test_pickle_version_warning_is_not_raised_with_matching_version():
+    iris = datasets.load_iris()
+    tree = DecisionTreeClassifier().fit(iris.data, iris.target)
+    tree_pickle = pickle.dumps(tree)
+    assert b"_sklearn_version" in tree_pickle
+    tree_restored = assert_no_warnings(pickle.loads, tree_pickle)
+
+    # test that we can predict with the restored decision tree classifier
+    score_of_original = tree.score(iris.data, iris.target)
+    score_of_restored = tree_restored.score(iris.data, iris.target)
+    assert score_of_original == score_of_restored
+
+
+class TreeBadVersion(DecisionTreeClassifier):
+    def __getstate__(self):
+        return dict(self.__dict__.items(), _sklearn_version="something")
+
+
+pickle_error_message = (
+    "Trying to unpickle estimator {estimator} from "
+    "version {old_version} when using version "
+    "{current_version}. This might "
+    "lead to breaking code or invalid results. "
+    "Use at your own risk."
+)
+
+
+def test_pickle_version_warning_is_issued_upon_different_version():
+    iris = datasets.load_iris()
+    tree = TreeBadVersion().fit(iris.data, iris.target)
+    tree_pickle_other = pickle.dumps(tree)
+    message = pickle_error_message.format(
+        estimator="TreeBadVersion",
+        old_version="something",
+        current_version=sklearn.__version__,
+    )
+    with pytest.warns(UserWarning, match=message) as warning_record:
+        pickle.loads(tree_pickle_other)
+
+    message = warning_record.list[0].message
+    assert isinstance(message, InconsistentVersionWarning)
+    assert message.estimator_name == "TreeBadVersion"
+    assert message.original_sklearn_version == "something"
+    assert message.current_sklearn_version == sklearn.__version__
+
+
+class TreeNoVersion(DecisionTreeClassifier):
+    def __getstate__(self):
+        return self.__dict__
+
+
+def test_pickle_version_warning_is_issued_when_no_version_info_in_pickle():
+    iris = datasets.load_iris()
+    # TreeNoVersion has no getstate, like pre-0.18
+    tree = TreeNoVersion().fit(iris.data, iris.target)
+
+    tree_pickle_noversion = pickle.dumps(tree)
+    assert b"_sklearn_version" not in tree_pickle_noversion
+    message = pickle_error_message.format(
+        estimator="TreeNoVersion",
+        old_version="pre-0.18",
+        current_version=sklearn.__version__,
+    )
+    # check we got the warning about using pre-0.18 pickle
+    with pytest.warns(UserWarning, match=message):
+        pickle.loads(tree_pickle_noversion)
+
+
+def test_pickle_version_no_warning_is_issued_with_non_sklearn_estimator():
+    iris = datasets.load_iris()
+    tree = TreeNoVersion().fit(iris.data, iris.target)
+    tree_pickle_noversion = pickle.dumps(tree)
+    try:
+        module_backup = TreeNoVersion.__module__
+        TreeNoVersion.__module__ = "notsklearn"
+        assert_no_warnings(pickle.loads, tree_pickle_noversion)
+    finally:
+        TreeNoVersion.__module__ = module_backup
+
+
+class DontPickleAttributeMixin:
+    def __getstate__(self):
+        data = self.__dict__.copy()
+        data["_attribute_not_pickled"] = None
+        return data
+
+    def __setstate__(self, state):
+        state["_restored"] = True
+        self.__dict__.update(state)
+
+
+class MultiInheritanceEstimator(DontPickleAttributeMixin, BaseEstimator):
+    def __init__(self, attribute_pickled=5):
+        self.attribute_pickled = attribute_pickled
+        self._attribute_not_pickled = None
+
+
+def test_pickling_when_getstate_is_overwritten_by_mixin():
+    estimator = MultiInheritanceEstimator()
+    estimator._attribute_not_pickled = "this attribute should not be pickled"
+
+    serialized = pickle.dumps(estimator)
+    estimator_restored = pickle.loads(serialized)
+    assert estimator_restored.attribute_pickled == 5
+    assert estimator_restored._attribute_not_pickled is None
+    assert estimator_restored._restored
+
+
+def test_pickling_when_getstate_is_overwritten_by_mixin_outside_of_sklearn():
+    try:
+        estimator = MultiInheritanceEstimator()
+        text = "this attribute should not be pickled"
+        estimator._attribute_not_pickled = text
+        old_mod = type(estimator).__module__
+        type(estimator).__module__ = "notsklearn"
+
+        serialized = estimator.__getstate__()
+        assert serialized == {"_attribute_not_pickled": None, "attribute_pickled": 5}
+
+        serialized["attribute_pickled"] = 4
+        estimator.__setstate__(serialized)
+        assert estimator.attribute_pickled == 4
+        assert estimator._restored
+    finally:
+        type(estimator).__module__ = old_mod
+
+
+class SingleInheritanceEstimator(BaseEstimator):
+    def __init__(self, attribute_pickled=5):
+        self.attribute_pickled = attribute_pickled
+        self._attribute_not_pickled = None
+
+    def __getstate__(self):
+        data = self.__dict__.copy()
+        data["_attribute_not_pickled"] = None
+        return data
+
+
+@ignore_warnings(category=(UserWarning))
+def test_pickling_works_when_getstate_is_overwritten_in_the_child_class():
+    estimator = SingleInheritanceEstimator()
+    estimator._attribute_not_pickled = "this attribute should not be pickled"
+
+    serialized = pickle.dumps(estimator)
+    estimator_restored = pickle.loads(serialized)
+    assert estimator_restored.attribute_pickled == 5
+    assert estimator_restored._attribute_not_pickled is None
+
+
+def test_tag_inheritance():
+    # test that changing tags by inheritance is not allowed
+
+    nan_tag_est = NaNTag()
+    no_nan_tag_est = NoNaNTag()
+    assert nan_tag_est._get_tags()["allow_nan"]
+    assert not no_nan_tag_est._get_tags()["allow_nan"]
+
+    redefine_tags_est = OverrideTag()
+    assert not redefine_tags_est._get_tags()["allow_nan"]
+
+    diamond_tag_est = DiamondOverwriteTag()
+    assert diamond_tag_est._get_tags()["allow_nan"]
+
+    inherit_diamond_tag_est = InheritDiamondOverwriteTag()
+    assert inherit_diamond_tag_est._get_tags()["allow_nan"]
+
+
+def test_raises_on_get_params_non_attribute():
+    class MyEstimator(BaseEstimator):
+        def __init__(self, param=5):
+            pass
+
+        def fit(self, X, y=None):
+            return self
+
+    est = MyEstimator()
+    msg = "'MyEstimator' object has no attribute 'param'"
+
+    with pytest.raises(AttributeError, match=msg):
+        est.get_params()
+
+
+def test_repr_mimebundle_():
+    # Checks the display configuration flag controls the json output
+    tree = DecisionTreeClassifier()
+    output = tree._repr_mimebundle_()
+    assert "text/plain" in output
+    assert "text/html" in output
+
+    with config_context(display="text"):
+        output = tree._repr_mimebundle_()
+        assert "text/plain" in output
+        assert "text/html" not in output
+
+
+def test_repr_html_wraps():
+    # Checks the display configuration flag controls the html output
+    tree = DecisionTreeClassifier()
+
+    output = tree._repr_html_()
+    assert "<style>" in output
+
+    with config_context(display="text"):
+        msg = "_repr_html_ is only defined when"
+        with pytest.raises(AttributeError, match=msg):
+            output = tree._repr_html_()
+
+
+def test_n_features_in_validation():
+    """Check that `_check_n_features` validates data when reset=False"""
+    est = MyEstimator()
+    X_train = [[1, 2, 3], [4, 5, 6]]
+    est._check_n_features(X_train, reset=True)
+
+    assert est.n_features_in_ == 3
+
+    msg = "X does not contain any features, but MyEstimator is expecting 3 features"
+    with pytest.raises(ValueError, match=msg):
+        est._check_n_features("invalid X", reset=False)
+
+
+def test_n_features_in_no_validation():
+    """Check that `_check_n_features` does not validate data when
+    n_features_in_ is not defined."""
+    est = MyEstimator()
+    est._check_n_features("invalid X", reset=True)
+
+    assert not hasattr(est, "n_features_in_")
+
+    # does not raise
+    est._check_n_features("invalid X", reset=False)
+
+
+def test_feature_names_in():
+    """Check that feature_name_in are recorded by `_validate_data`"""
+    pd = pytest.importorskip("pandas")
+    iris = datasets.load_iris()
+    X_np = iris.data
+    df = pd.DataFrame(X_np, columns=iris.feature_names)
+
+    class NoOpTransformer(TransformerMixin, BaseEstimator):
+        def fit(self, X, y=None):
+            self._validate_data(X)
+            return self
+
+        def transform(self, X):
+            self._validate_data(X, reset=False)
+            return X
+
+    # fit on dataframe saves the feature names
+    trans = NoOpTransformer().fit(df)
+    assert_array_equal(trans.feature_names_in_, df.columns)
+
+    # fit again but on ndarray does not keep the previous feature names (see #21383)
+    trans.fit(X_np)
+    assert not hasattr(trans, "feature_names_in_")
+
+    trans.fit(df)
+    msg = "The feature names should match those that were passed"
+    df_bad = pd.DataFrame(X_np, columns=iris.feature_names[::-1])
+    with pytest.raises(ValueError, match=msg):
+        trans.transform(df_bad)
+
+    # warns when fitted on dataframe and transforming a ndarray
+    msg = (
+        "X does not have valid feature names, but NoOpTransformer was "
+        "fitted with feature names"
+    )
+    with pytest.warns(UserWarning, match=msg):
+        trans.transform(X_np)
+
+    # warns when fitted on a ndarray and transforming dataframe
+    msg = "X has feature names, but NoOpTransformer was fitted without feature names"
+    trans = NoOpTransformer().fit(X_np)
+    with pytest.warns(UserWarning, match=msg):
+        trans.transform(df)
+
+    # fit on dataframe with all integer feature names works without warning
+    df_int_names = pd.DataFrame(X_np)
+    trans = NoOpTransformer()
+    with warnings.catch_warnings():
+        warnings.simplefilter("error", UserWarning)
+        trans.fit(df_int_names)
+
+    # fit on dataframe with no feature names or all integer feature names
+    # -> do not warn on transform
+    Xs = [X_np, df_int_names]
+    for X in Xs:
+        with warnings.catch_warnings():
+            warnings.simplefilter("error", UserWarning)
+            trans.transform(X)
+
+    # fit on dataframe with feature names that are mixed raises an error:
+    df_mixed = pd.DataFrame(X_np, columns=["a", "b", 1, 2])
+    trans = NoOpTransformer()
+    msg = re.escape(
+        "Feature names are only supported if all input features have string names, "
+        "but your input has ['int', 'str'] as feature name / column name types. "
+        "If you want feature names to be stored and validated, you must convert "
+        "them all to strings, by using X.columns = X.columns.astype(str) for "
+        "example. Otherwise you can remove feature / column names from your input "
+        "data, or convert them all to a non-string data type."
+    )
+    with pytest.raises(TypeError, match=msg):
+        trans.fit(df_mixed)
+
+    # transform on feature names that are mixed also raises:
+    with pytest.raises(TypeError, match=msg):
+        trans.transform(df_mixed)
+
+
+def test_validate_data_cast_to_ndarray():
+    """Check cast_to_ndarray option of _validate_data."""
+
+    pd = pytest.importorskip("pandas")
+    iris = datasets.load_iris()
+    df = pd.DataFrame(iris.data, columns=iris.feature_names)
+    y = pd.Series(iris.target)
+
+    class NoOpTransformer(TransformerMixin, BaseEstimator):
+        pass
+
+    no_op = NoOpTransformer()
+    X_np_out = no_op._validate_data(df, cast_to_ndarray=True)
+    assert isinstance(X_np_out, np.ndarray)
+    assert_allclose(X_np_out, df.to_numpy())
+
+    X_df_out = no_op._validate_data(df, cast_to_ndarray=False)
+    assert X_df_out is df
+
+    y_np_out = no_op._validate_data(y=y, cast_to_ndarray=True)
+    assert isinstance(y_np_out, np.ndarray)
+    assert_allclose(y_np_out, y.to_numpy())
+
+    y_series_out = no_op._validate_data(y=y, cast_to_ndarray=False)
+    assert y_series_out is y
+
+    X_np_out, y_np_out = no_op._validate_data(df, y, cast_to_ndarray=True)
+    assert isinstance(X_np_out, np.ndarray)
+    assert_allclose(X_np_out, df.to_numpy())
+    assert isinstance(y_np_out, np.ndarray)
+    assert_allclose(y_np_out, y.to_numpy())
+
+    X_df_out, y_series_out = no_op._validate_data(df, y, cast_to_ndarray=False)
+    assert X_df_out is df
+    assert y_series_out is y
+
+    msg = "Validation should be done on X, y or both."
+    with pytest.raises(ValueError, match=msg):
+        no_op._validate_data()
+
+
+def test_clone_keeps_output_config():
+    """Check that clone keeps the set_output config."""
+
+    ss = StandardScaler().set_output(transform="pandas")
+    config = _get_output_config("transform", ss)
+
+    ss_clone = clone(ss)
+    config_clone = _get_output_config("transform", ss_clone)
+    assert config == config_clone
+
+
+class _Empty:
+    pass
+
+
+class EmptyEstimator(_Empty, BaseEstimator):
+    pass
+
+
+@pytest.mark.parametrize("estimator", [BaseEstimator(), EmptyEstimator()])
+def test_estimator_empty_instance_dict(estimator):
+    """Check that ``__getstate__`` returns an empty ``dict`` with an empty
+    instance.
+
+    Python 3.11+ changed behaviour by returning ``None`` instead of raising an
+    ``AttributeError``. Non-regression test for gh-25188.
+    """
+    state = estimator.__getstate__()
+    expected = {"_sklearn_version": sklearn.__version__}
+    assert state == expected
+
+    # this should not raise
+    pickle.loads(pickle.dumps(BaseEstimator()))
+
+
+def test_estimator_getstate_using_slots_error_message():
+    """Using a `BaseEstimator` with `__slots__` is not supported."""
+
+    class WithSlots:
+        __slots__ = ("x",)
+
+    class Estimator(BaseEstimator, WithSlots):
+        pass
+
+    msg = (
+        "You cannot use `__slots__` in objects inheriting from "
+        "`sklearn.base.BaseEstimator`"
+    )
+
+    with pytest.raises(TypeError, match=msg):
+        Estimator().__getstate__()
+
+    with pytest.raises(TypeError, match=msg):
+        pickle.dumps(Estimator())
+
+
+@pytest.mark.parametrize(
+    "constructor_name, minversion",
+    [
+        ("dataframe", "1.5.0"),
+        ("pyarrow", "12.0.0"),
+        ("polars", "0.20.23"),
+    ],
+)
+def test_dataframe_protocol(constructor_name, minversion):
+    """Uses the dataframe exchange protocol to get feature names."""
+    data = [[1, 4, 2], [3, 3, 6]]
+    columns = ["col_0", "col_1", "col_2"]
+    df = _convert_container(
+        data, constructor_name, columns_name=columns, minversion=minversion
+    )
+
+    class NoOpTransformer(TransformerMixin, BaseEstimator):
+        def fit(self, X, y=None):
+            self._validate_data(X)
+            return self
+
+        def transform(self, X):
+            return self._validate_data(X, reset=False)
+
+    no_op = NoOpTransformer()
+    no_op.fit(df)
+    assert_array_equal(no_op.feature_names_in_, columns)
+    X_out = no_op.transform(df)
+
+    if constructor_name != "pyarrow":
+        # pyarrow does not work with `np.asarray`
+        # https://github.com/apache/arrow/issues/34886
+        assert_allclose(df, X_out)
+
+    bad_names = ["a", "b", "c"]
+    df_bad = _convert_container(data, constructor_name, columns_name=bad_names)
+    with pytest.raises(ValueError, match="The feature names should match"):
+        no_op.transform(df_bad)
+
+
+@pytest.mark.usefixtures("enable_slep006")
+def test_transformer_fit_transform_with_metadata_in_transform():
+    """Test that having a transformer with metadata for transform raises a
+    warning when calling fit_transform."""
+
+    class CustomTransformer(BaseEstimator, TransformerMixin):
+        def fit(self, X, y=None, prop=None):
+            return self
+
+        def transform(self, X, prop=None):
+            return X
+
+    # passing the metadata to `fit_transform` should raise a warning since it
+    # could potentially be consumed by `transform`
+    with pytest.warns(UserWarning, match="`transform` method which consumes metadata"):
+        CustomTransformer().set_transform_request(prop=True).fit_transform(
+            [[1]], [1], prop=1
+        )
+
+    # not passing a metadata which can potentially be consumed by `transform` should
+    # not raise a warning
+    with warnings.catch_warnings(record=True) as record:
+        CustomTransformer().set_transform_request(prop=True).fit_transform([[1]], [1])
+        assert len(record) == 0
+
+
+@pytest.mark.usefixtures("enable_slep006")
+def test_outlier_mixin_fit_predict_with_metadata_in_predict():
+    """Test that having an OutlierMixin with metadata for predict raises a
+    warning when calling fit_predict."""
+
+    class CustomOutlierDetector(BaseEstimator, OutlierMixin):
+        def fit(self, X, y=None, prop=None):
+            return self
+
+        def predict(self, X, prop=None):
+            return X
+
+    # passing the metadata to `fit_predict` should raise a warning since it
+    # could potentially be consumed by `predict`
+    with pytest.warns(UserWarning, match="`predict` method which consumes metadata"):
+        CustomOutlierDetector().set_predict_request(prop=True).fit_predict(
+            [[1]], [1], prop=1
+        )
+
+    # not passing a metadata which can potentially be consumed by `predict` should
+    # not raise a warning
+    with warnings.catch_warnings(record=True) as record:
+        CustomOutlierDetector().set_predict_request(prop=True).fit_predict([[1]], [1])
+        assert len(record) == 0
@@ -0,0 +1,34 @@
+import os
+import textwrap
+
+import pytest
+
+from sklearn import __version__
+from sklearn.utils._openmp_helpers import _openmp_parallelism_enabled
+
+
+def test_openmp_parallelism_enabled():
+    # Check that sklearn is built with OpenMP-based parallelism enabled.
+    # This test can be skipped by setting the environment variable
+    # ``SKLEARN_SKIP_OPENMP_TEST``.
+    if os.getenv("SKLEARN_SKIP_OPENMP_TEST"):
+        pytest.skip("test explicitly skipped (SKLEARN_SKIP_OPENMP_TEST)")
+
+    base_url = "dev" if __version__.endswith(".dev0") else "stable"
+    err_msg = textwrap.dedent(
+        """
+        This test fails because scikit-learn has been built without OpenMP.
+        This is not recommended since some estimators will run in sequential
+        mode instead of leveraging thread-based parallelism.
+
+        You can find instructions to build scikit-learn with OpenMP at this
+        address:
+
+            https://scikit-learn.org/{}/developers/advanced_installation.html
+
+        You can skip this test by setting the environment variable
+        SKLEARN_SKIP_OPENMP_TEST to any value.
+        """
+    ).format(base_url)
+
+    assert _openmp_parallelism_enabled(), err_msg
@@ -0,0 +1,15 @@
+"""
+Smoke Test the check_build module
+"""
+
+# Author: G Varoquaux
+# License: BSD 3 clause
+
+import pytest
+
+from sklearn.__check_build import raise_build_error
+
+
+def test_raise_build_error():
+    with pytest.raises(ImportError):
+        raise_build_error(ImportError())
@@ -0,0 +1,626 @@
+"""
+General tests for all estimators in sklearn.
+"""
+
+# Authors: Andreas Mueller <amueller@ais.uni-bonn.de>
+#          Gael Varoquaux gael.varoquaux@normalesup.org
+# License: BSD 3 clause
+
+import os
+import pkgutil
+import re
+import sys
+import warnings
+from functools import partial
+from inspect import isgenerator, signature
+from itertools import chain, product
+from pathlib import Path
+
+import numpy as np
+import pytest
+
+import sklearn
+from sklearn.base import BaseEstimator
+from sklearn.cluster import (
+    OPTICS,
+    AffinityPropagation,
+    Birch,
+    MeanShift,
+    SpectralClustering,
+)
+from sklearn.compose import ColumnTransformer
+from sklearn.datasets import make_blobs
+from sklearn.decomposition import PCA
+from sklearn.exceptions import ConvergenceWarning, FitFailedWarning
+
+# make it possible to discover experimental estimators when calling `all_estimators`
+from sklearn.experimental import (
+    enable_halving_search_cv,  # noqa
+    enable_iterative_imputer,  # noqa
+)
+from sklearn.linear_model import LogisticRegression, Ridge
+from sklearn.linear_model._base import LinearClassifierMixin
+from sklearn.manifold import TSNE, Isomap, LocallyLinearEmbedding
+from sklearn.model_selection import (
+    GridSearchCV,
+    HalvingGridSearchCV,
+    HalvingRandomSearchCV,
+    RandomizedSearchCV,
+)
+from sklearn.neighbors import (
+    KNeighborsClassifier,
+    KNeighborsRegressor,
+    LocalOutlierFactor,
+    RadiusNeighborsClassifier,
+    RadiusNeighborsRegressor,
+)
+from sklearn.pipeline import Pipeline, make_pipeline
+from sklearn.preprocessing import (
+    FunctionTransformer,
+    MinMaxScaler,
+    OneHotEncoder,
+    StandardScaler,
+)
+from sklearn.semi_supervised import LabelPropagation, LabelSpreading
+from sklearn.utils import all_estimators
+from sklearn.utils._tags import _DEFAULT_TAGS, _safe_tags
+from sklearn.utils._testing import (
+    SkipTest,
+    ignore_warnings,
+    set_random_state,
+)
+from sklearn.utils.estimator_checks import (
+    _construct_instance,
+    _get_check_estimator_ids,
+    _set_checking_parameters,
+    check_class_weight_balanced_linear_classifier,
+    check_dataframe_column_names_consistency,
+    check_estimator,
+    check_get_feature_names_out_error,
+    check_global_output_transform_pandas,
+    check_global_set_output_transform_polars,
+    check_n_features_in_after_fitting,
+    check_param_validation,
+    check_set_output_transform,
+    check_set_output_transform_pandas,
+    check_set_output_transform_polars,
+    check_transformer_get_feature_names_out,
+    check_transformer_get_feature_names_out_pandas,
+    parametrize_with_checks,
+)
+from sklearn.utils.fixes import _IS_PYPY, _IS_WASM
+
+
+def test_all_estimator_no_base_class():
+    # test that all_estimators doesn't find abstract classes.
+    for name, Estimator in all_estimators():
+        msg = (
+            "Base estimators such as {0} should not be included in all_estimators"
+        ).format(name)
+        assert not name.lower().startswith("base"), msg
+
+
+def _sample_func(x, y=1):
+    pass
+
+
+class CallableEstimator(BaseEstimator):
+    """Dummy development stub for an estimator.
+
+    This is to make sure a callable estimator passes common tests.
+    """
+
+    def __call__(self):
+        pass  # pragma: nocover
+
+
+@pytest.mark.parametrize(
+    "val, expected",
+    [
+        (partial(_sample_func, y=1), "_sample_func(y=1)"),
+        (_sample_func, "_sample_func"),
+        (partial(_sample_func, "world"), "_sample_func"),
+        (LogisticRegression(C=2.0), "LogisticRegression(C=2.0)"),
+        (
+            LogisticRegression(
+                random_state=1,
+                solver="newton-cg",
+                class_weight="balanced",
+                warm_start=True,
+            ),
+            (
+                "LogisticRegression(class_weight='balanced',random_state=1,"
+                "solver='newton-cg',warm_start=True)"
+            ),
+        ),
+        (CallableEstimator(), "CallableEstimator()"),
+    ],
+)
+def test_get_check_estimator_ids(val, expected):
+    assert _get_check_estimator_ids(val) == expected
+
+
+def _tested_estimators(type_filter=None):
+    for name, Estimator in all_estimators(type_filter=type_filter):
+        try:
+            estimator = _construct_instance(Estimator)
+        except SkipTest:
+            continue
+
+        yield estimator
+
+
+def _generate_pipeline():
+    for final_estimator in [Ridge(), LogisticRegression()]:
+        yield Pipeline(
+            steps=[
+                ("scaler", StandardScaler()),
+                ("final_estimator", final_estimator),
+            ]
+        )
+
+
+@parametrize_with_checks(list(chain(_tested_estimators(), _generate_pipeline())))
+def test_estimators(estimator, check, request):
+    # Common tests for estimator instances
+    with ignore_warnings(category=(FutureWarning, ConvergenceWarning, UserWarning)):
+        _set_checking_parameters(estimator)
+        check(estimator)
+
+
+def test_check_estimator_generate_only():
+    all_instance_gen_checks = check_estimator(LogisticRegression(), generate_only=True)
+    assert isgenerator(all_instance_gen_checks)
+
+
+def test_setup_py_check():
+    # Smoke test `python setup.py check` command run at the root of the
+    # scikit-learn source tree.
+    cwd = os.getcwd()
+    setup_path = Path(sklearn.__file__).parent.parent
+    setup_filename = os.path.join(setup_path, "setup.py")
+    if not os.path.exists(setup_filename):
+        pytest.skip("setup.py not available")
+    try:
+        os.chdir(setup_path)
+        old_argv = sys.argv
+        sys.argv = ["setup.py", "check"]
+
+        with warnings.catch_warnings():
+            # The configuration spits out warnings when not finding
+            # Blas/Atlas development headers
+            warnings.simplefilter("ignore", UserWarning)
+            with open("setup.py") as f:
+                exec(f.read(), dict(__name__="__main__"))
+    finally:
+        sys.argv = old_argv
+        os.chdir(cwd)
+
+
+def _tested_linear_classifiers():
+    classifiers = all_estimators(type_filter="classifier")
+
+    with warnings.catch_warnings(record=True):
+        for name, clazz in classifiers:
+            required_parameters = getattr(clazz, "_required_parameters", [])
+            if len(required_parameters):
+                # FIXME
+                continue
+
+            if "class_weight" in clazz().get_params().keys() and issubclass(
+                clazz, LinearClassifierMixin
+            ):
+                yield name, clazz
+
+
+@pytest.mark.parametrize("name, Classifier", _tested_linear_classifiers())
+def test_class_weight_balanced_linear_classifiers(name, Classifier):
+    check_class_weight_balanced_linear_classifier(name, Classifier)
+
+
+@pytest.mark.xfail(_IS_WASM, reason="importlib not supported for Pyodide packages")
+@ignore_warnings
+def test_import_all_consistency():
+    sklearn_path = [os.path.dirname(sklearn.__file__)]
+    # Smoke test to check that any name in a __all__ list is actually defined
+    # in the namespace of the module or package.
+    pkgs = pkgutil.walk_packages(
+        path=sklearn_path, prefix="sklearn.", onerror=lambda _: None
+    )
+    submods = [modname for _, modname, _ in pkgs]
+    for modname in submods + ["sklearn"]:
+        if ".tests." in modname:
+            continue
+        # Avoid test suite depending on setuptools
+        if "sklearn._build_utils" in modname:
+            continue
+        if _IS_PYPY and (
+            "_svmlight_format_io" in modname
+            or "feature_extraction._hashing_fast" in modname
+        ):
+            continue
+        package = __import__(modname, fromlist="dummy")
+        for name in getattr(package, "__all__", ()):
+            assert hasattr(package, name), "Module '{0}' has no attribute '{1}'".format(
+                modname, name
+            )
+
+
+def test_root_import_all_completeness():
+    sklearn_path = [os.path.dirname(sklearn.__file__)]
+    EXCEPTIONS = ("utils", "tests", "base", "setup", "conftest")
+    for _, modname, _ in pkgutil.walk_packages(
+        path=sklearn_path, onerror=lambda _: None
+    ):
+        if "." in modname or modname.startswith("_") or modname in EXCEPTIONS:
+            continue
+        assert modname in sklearn.__all__
+
+
+@pytest.mark.skipif(
+    sklearn._BUILT_WITH_MESON,
+    reason=(
+        "This test fails with Meson editable installs see"
+        " https://github.com/mesonbuild/meson-python/issues/557 for more details"
+    ),
+)
+def test_all_tests_are_importable():
+    # Ensure that for each contentful subpackage, there is a test directory
+    # within it that is also a subpackage (i.e. a directory with __init__.py)
+
+    HAS_TESTS_EXCEPTIONS = re.compile(
+        r"""(?x)
+                                      \.externals(\.|$)|
+                                      \.tests(\.|$)|
+                                      \._
+                                      """
+    )
+    resource_modules = {
+        "sklearn.datasets.data",
+        "sklearn.datasets.descr",
+        "sklearn.datasets.images",
+    }
+    sklearn_path = [os.path.dirname(sklearn.__file__)]
+    lookup = {
+        name: ispkg
+        for _, name, ispkg in pkgutil.walk_packages(sklearn_path, prefix="sklearn.")
+    }
+    missing_tests = [
+        name
+        for name, ispkg in lookup.items()
+        if ispkg
+        and name not in resource_modules
+        and not HAS_TESTS_EXCEPTIONS.search(name)
+        and name + ".tests" not in lookup
+    ]
+    assert missing_tests == [], (
+        "{0} do not have `tests` subpackages. "
+        "Perhaps they require "
+        "__init__.py or an add_subpackage directive "
+        "in the parent "
+        "setup.py".format(missing_tests)
+    )
+
+
+def test_class_support_removed():
+    # Make sure passing classes to check_estimator or parametrize_with_checks
+    # raises an error
+
+    msg = "Passing a class was deprecated.* isn't supported anymore"
+    with pytest.raises(TypeError, match=msg):
+        check_estimator(LogisticRegression)
+
+    with pytest.raises(TypeError, match=msg):
+        parametrize_with_checks([LogisticRegression])
+
+
+def _generate_column_transformer_instances():
+    yield ColumnTransformer(
+        transformers=[
+            ("trans1", StandardScaler(), [0, 1]),
+        ]
+    )
+
+
+def _generate_search_cv_instances():
+    for SearchCV, (Estimator, param_grid) in product(
+        [
+            GridSearchCV,
+            HalvingGridSearchCV,
+            RandomizedSearchCV,
+            HalvingGridSearchCV,
+        ],
+        [
+            (Ridge, {"alpha": [0.1, 1.0]}),
+            (LogisticRegression, {"C": [0.1, 1.0]}),
+        ],
+    ):
+        init_params = signature(SearchCV).parameters
+        extra_params = (
+            {"min_resources": "smallest"} if "min_resources" in init_params else {}
+        )
+        search_cv = SearchCV(Estimator(), param_grid, cv=2, **extra_params)
+        set_random_state(search_cv)
+        yield search_cv
+
+    for SearchCV, (Estimator, param_grid) in product(
+        [
+            GridSearchCV,
+            HalvingGridSearchCV,
+            RandomizedSearchCV,
+            HalvingRandomSearchCV,
+        ],
+        [
+            (Ridge, {"ridge__alpha": [0.1, 1.0]}),
+            (LogisticRegression, {"logisticregression__C": [0.1, 1.0]}),
+        ],
+    ):
+        init_params = signature(SearchCV).parameters
+        extra_params = (
+            {"min_resources": "smallest"} if "min_resources" in init_params else {}
+        )
+        search_cv = SearchCV(
+            make_pipeline(PCA(), Estimator()), param_grid, cv=2, **extra_params
+        ).set_params(error_score="raise")
+        set_random_state(search_cv)
+        yield search_cv
+
+
+@parametrize_with_checks(list(_generate_search_cv_instances()))
+def test_search_cv(estimator, check, request):
+    # Common tests for SearchCV instances
+    # We have a separate test because those meta-estimators can accept a
+    # wide range of base estimators (classifiers, regressors, pipelines)
+    with ignore_warnings(
+        category=(
+            FutureWarning,
+            ConvergenceWarning,
+            UserWarning,
+            FitFailedWarning,
+        )
+    ):
+        check(estimator)
+
+
+@pytest.mark.parametrize(
+    "estimator", _tested_estimators(), ids=_get_check_estimator_ids
+)
+def test_valid_tag_types(estimator):
+    """Check that estimator tags are valid."""
+    tags = _safe_tags(estimator)
+
+    for name, tag in tags.items():
+        correct_tags = type(_DEFAULT_TAGS[name])
+        if name == "_xfail_checks":
+            # _xfail_checks can be a dictionary
+            correct_tags = (correct_tags, dict)
+        assert isinstance(tag, correct_tags)
+
+
+@pytest.mark.parametrize(
+    "estimator", _tested_estimators(), ids=_get_check_estimator_ids
+)
+def test_check_n_features_in_after_fitting(estimator):
+    _set_checking_parameters(estimator)
+    check_n_features_in_after_fitting(estimator.__class__.__name__, estimator)
+
+
+def _estimators_that_predict_in_fit():
+    for estimator in _tested_estimators():
+        est_params = set(estimator.get_params())
+        if "oob_score" in est_params:
+            yield estimator.set_params(oob_score=True, bootstrap=True)
+        elif "early_stopping" in est_params:
+            est = estimator.set_params(early_stopping=True, n_iter_no_change=1)
+            if est.__class__.__name__ in {"MLPClassifier", "MLPRegressor"}:
+                # TODO: FIX MLP to not check validation set during MLP
+                yield pytest.param(
+                    est, marks=pytest.mark.xfail(msg="MLP still validates in fit")
+                )
+            else:
+                yield est
+        elif "n_iter_no_change" in est_params:
+            yield estimator.set_params(n_iter_no_change=1)
+
+
+# NOTE: When running `check_dataframe_column_names_consistency` on a meta-estimator that
+# delegates validation to a base estimator, the check is testing that the base estimator
+# is checking for column name consistency.
+column_name_estimators = list(
+    chain(
+        _tested_estimators(),
+        [make_pipeline(LogisticRegression(C=1))],
+        list(_generate_search_cv_instances()),
+        _estimators_that_predict_in_fit(),
+    )
+)
+
+
+@pytest.mark.parametrize(
+    "estimator", column_name_estimators, ids=_get_check_estimator_ids
+)
+def test_pandas_column_name_consistency(estimator):
+    _set_checking_parameters(estimator)
+    with ignore_warnings(category=(FutureWarning)):
+        with warnings.catch_warnings(record=True) as record:
+            check_dataframe_column_names_consistency(
+                estimator.__class__.__name__, estimator
+            )
+        for warning in record:
+            assert "was fitted without feature names" not in str(warning.message)
+
+
+# TODO: As more modules support get_feature_names_out they should be removed
+# from this list to be tested
+GET_FEATURES_OUT_MODULES_TO_IGNORE = [
+    "ensemble",
+    "kernel_approximation",
+]
+
+
+def _include_in_get_feature_names_out_check(transformer):
+    if hasattr(transformer, "get_feature_names_out"):
+        return True
+    module = transformer.__module__.split(".")[1]
+    return module not in GET_FEATURES_OUT_MODULES_TO_IGNORE
+
+
+GET_FEATURES_OUT_ESTIMATORS = [
+    est
+    for est in _tested_estimators("transformer")
+    if _include_in_get_feature_names_out_check(est)
+]
+
+
+@pytest.mark.parametrize(
+    "transformer", GET_FEATURES_OUT_ESTIMATORS, ids=_get_check_estimator_ids
+)
+def test_transformers_get_feature_names_out(transformer):
+    _set_checking_parameters(transformer)
+
+    with ignore_warnings(category=(FutureWarning)):
+        check_transformer_get_feature_names_out(
+            transformer.__class__.__name__, transformer
+        )
+        check_transformer_get_feature_names_out_pandas(
+            transformer.__class__.__name__, transformer
+        )
+
+
+ESTIMATORS_WITH_GET_FEATURE_NAMES_OUT = [
+    est for est in _tested_estimators() if hasattr(est, "get_feature_names_out")
+]
+
+
+@pytest.mark.parametrize(
+    "estimator", ESTIMATORS_WITH_GET_FEATURE_NAMES_OUT, ids=_get_check_estimator_ids
+)
+def test_estimators_get_feature_names_out_error(estimator):
+    estimator_name = estimator.__class__.__name__
+    _set_checking_parameters(estimator)
+    check_get_feature_names_out_error(estimator_name, estimator)
+
+
+@pytest.mark.parametrize(
+    "Estimator",
+    [est for name, est in all_estimators()],
+)
+def test_estimators_do_not_raise_errors_in_init_or_set_params(Estimator):
+    """Check that init or set_param does not raise errors."""
+    params = signature(Estimator).parameters
+
+    smoke_test_values = [-1, 3.0, "helloworld", np.array([1.0, 4.0]), [1], {}, []]
+    for value in smoke_test_values:
+        new_params = {key: value for key in params}
+
+        # Does not raise
+        est = Estimator(**new_params)
+
+        # Also do does not raise
+        est.set_params(**new_params)
+
+
+@pytest.mark.parametrize(
+    "estimator",
+    chain(
+        _tested_estimators(),
+        _generate_pipeline(),
+        _generate_column_transformer_instances(),
+        _generate_search_cv_instances(),
+    ),
+    ids=_get_check_estimator_ids,
+)
+def test_check_param_validation(estimator):
+    name = estimator.__class__.__name__
+    _set_checking_parameters(estimator)
+    check_param_validation(name, estimator)
+
+
+@pytest.mark.parametrize(
+    "Estimator",
+    [
+        AffinityPropagation,
+        Birch,
+        MeanShift,
+        KNeighborsClassifier,
+        KNeighborsRegressor,
+        RadiusNeighborsClassifier,
+        RadiusNeighborsRegressor,
+        LabelPropagation,
+        LabelSpreading,
+        OPTICS,
+        SpectralClustering,
+        LocalOutlierFactor,
+        LocallyLinearEmbedding,
+        Isomap,
+        TSNE,
+    ],
+)
+def test_f_contiguous_array_estimator(Estimator):
+    # Non-regression test for:
+    # https://github.com/scikit-learn/scikit-learn/issues/23988
+    # https://github.com/scikit-learn/scikit-learn/issues/24013
+
+    X, _ = make_blobs(n_samples=80, n_features=4, random_state=0)
+    X = np.asfortranarray(X)
+    y = np.round(X[:, 0])
+
+    est = Estimator()
+    est.fit(X, y)
+
+    if hasattr(est, "transform"):
+        est.transform(X)
+
+    if hasattr(est, "predict"):
+        est.predict(X)
+
+
+SET_OUTPUT_ESTIMATORS = list(
+    chain(
+        _tested_estimators("transformer"),
+        [
+            make_pipeline(StandardScaler(), MinMaxScaler()),
+            OneHotEncoder(sparse_output=False),
+            FunctionTransformer(feature_names_out="one-to-one"),
+        ],
+    )
+)
+
+
+@pytest.mark.parametrize(
+    "estimator", SET_OUTPUT_ESTIMATORS, ids=_get_check_estimator_ids
+)
+def test_set_output_transform(estimator):
+    name = estimator.__class__.__name__
+    if not hasattr(estimator, "set_output"):
+        pytest.skip(
+            f"Skipping check_set_output_transform for {name}: Does not support"
+            " set_output API"
+        )
+    _set_checking_parameters(estimator)
+    with ignore_warnings(category=(FutureWarning)):
+        check_set_output_transform(estimator.__class__.__name__, estimator)
+
+
+@pytest.mark.parametrize(
+    "estimator", SET_OUTPUT_ESTIMATORS, ids=_get_check_estimator_ids
+)
+@pytest.mark.parametrize(
+    "check_func",
+    [
+        check_set_output_transform_pandas,
+        check_global_output_transform_pandas,
+        check_set_output_transform_polars,
+        check_global_set_output_transform_polars,
+    ],
+)
+def test_set_output_transform_configured(estimator, check_func):
+    name = estimator.__class__.__name__
+    if not hasattr(estimator, "set_output"):
+        pytest.skip(
+            f"Skipping {check_func.__name__} for {name}: Does not support"
+            " set_output API yet"
+        )
+    _set_checking_parameters(estimator)
+    with ignore_warnings(category=(FutureWarning)):
+        check_func(estimator.__class__.__name__, estimator)
@@ -0,0 +1,199 @@
+import builtins
+import time
+from concurrent.futures import ThreadPoolExecutor
+
+import pytest
+
+import sklearn
+from sklearn import config_context, get_config, set_config
+from sklearn.utils.fixes import _IS_WASM
+from sklearn.utils.parallel import Parallel, delayed
+
+
+def test_config_context():
+    assert get_config() == {
+        "assume_finite": False,
+        "working_memory": 1024,
+        "print_changed_only": True,
+        "display": "diagram",
+        "array_api_dispatch": False,
+        "pairwise_dist_chunk_size": 256,
+        "enable_cython_pairwise_dist": True,
+        "transform_output": "default",
+        "enable_metadata_routing": False,
+        "skip_parameter_validation": False,
+    }
+
+    # Not using as a context manager affects nothing
+    config_context(assume_finite=True)
+    assert get_config()["assume_finite"] is False
+
+    with config_context(assume_finite=True):
+        assert get_config() == {
+            "assume_finite": True,
+            "working_memory": 1024,
+            "print_changed_only": True,
+            "display": "diagram",
+            "array_api_dispatch": False,
+            "pairwise_dist_chunk_size": 256,
+            "enable_cython_pairwise_dist": True,
+            "transform_output": "default",
+            "enable_metadata_routing": False,
+            "skip_parameter_validation": False,
+        }
+    assert get_config()["assume_finite"] is False
+
+    with config_context(assume_finite=True):
+        with config_context(assume_finite=None):
+            assert get_config()["assume_finite"] is True
+
+        assert get_config()["assume_finite"] is True
+
+        with config_context(assume_finite=False):
+            assert get_config()["assume_finite"] is False
+
+            with config_context(assume_finite=None):
+                assert get_config()["assume_finite"] is False
+
+                # global setting will not be retained outside of context that
+                # did not modify this setting
+                set_config(assume_finite=True)
+                assert get_config()["assume_finite"] is True
+
+            assert get_config()["assume_finite"] is False
+
+        assert get_config()["assume_finite"] is True
+
+    assert get_config() == {
+        "assume_finite": False,
+        "working_memory": 1024,
+        "print_changed_only": True,
+        "display": "diagram",
+        "array_api_dispatch": False,
+        "pairwise_dist_chunk_size": 256,
+        "enable_cython_pairwise_dist": True,
+        "transform_output": "default",
+        "enable_metadata_routing": False,
+        "skip_parameter_validation": False,
+    }
+
+    # No positional arguments
+    with pytest.raises(TypeError):
+        config_context(True)
+
+    # No unknown arguments
+    with pytest.raises(TypeError):
+        config_context(do_something_else=True).__enter__()
+
+
+def test_config_context_exception():
+    assert get_config()["assume_finite"] is False
+    try:
+        with config_context(assume_finite=True):
+            assert get_config()["assume_finite"] is True
+            raise ValueError()
+    except ValueError:
+        pass
+    assert get_config()["assume_finite"] is False
+
+
+def test_set_config():
+    assert get_config()["assume_finite"] is False
+    set_config(assume_finite=None)
+    assert get_config()["assume_finite"] is False
+    set_config(assume_finite=True)
+    assert get_config()["assume_finite"] is True
+    set_config(assume_finite=None)
+    assert get_config()["assume_finite"] is True
+    set_config(assume_finite=False)
+    assert get_config()["assume_finite"] is False
+
+    # No unknown arguments
+    with pytest.raises(TypeError):
+        set_config(do_something_else=True)
+
+
+def set_assume_finite(assume_finite, sleep_duration):
+    """Return the value of assume_finite after waiting `sleep_duration`."""
+    with config_context(assume_finite=assume_finite):
+        time.sleep(sleep_duration)
+        return get_config()["assume_finite"]
+
+
+@pytest.mark.parametrize("backend", ["loky", "multiprocessing", "threading"])
+def test_config_threadsafe_joblib(backend):
+    """Test that the global config is threadsafe with all joblib backends.
+    Two jobs are spawned and sets assume_finite to two different values.
+    When the job with a duration 0.1s completes, the assume_finite value
+    should be the same as the value passed to the function. In other words,
+    it is not influenced by the other job setting assume_finite to True.
+    """
+    assume_finites = [False, True, False, True]
+    sleep_durations = [0.1, 0.2, 0.1, 0.2]
+
+    items = Parallel(backend=backend, n_jobs=2)(
+        delayed(set_assume_finite)(assume_finite, sleep_dur)
+        for assume_finite, sleep_dur in zip(assume_finites, sleep_durations)
+    )
+
+    assert items == [False, True, False, True]
+
+
+@pytest.mark.xfail(_IS_WASM, reason="cannot start threads")
+def test_config_threadsafe():
+    """Uses threads directly to test that the global config does not change
+    between threads. Same test as `test_config_threadsafe_joblib` but with
+    `ThreadPoolExecutor`."""
+
+    assume_finites = [False, True, False, True]
+    sleep_durations = [0.1, 0.2, 0.1, 0.2]
+
+    with ThreadPoolExecutor(max_workers=2) as e:
+        items = [
+            output
+            for output in e.map(set_assume_finite, assume_finites, sleep_durations)
+        ]
+
+    assert items == [False, True, False, True]
+
+
+def test_config_array_api_dispatch_error(monkeypatch):
+    """Check error is raised when array_api_compat is not installed."""
+
+    # Hide array_api_compat import
+    orig_import = builtins.__import__
+
+    def mocked_import(name, *args, **kwargs):
+        if name == "array_api_compat":
+            raise ImportError
+        return orig_import(name, *args, **kwargs)
+
+    monkeypatch.setattr(builtins, "__import__", mocked_import)
+
+    with pytest.raises(ImportError, match="array_api_compat is required"):
+        with config_context(array_api_dispatch=True):
+            pass
+
+    with pytest.raises(ImportError, match="array_api_compat is required"):
+        set_config(array_api_dispatch=True)
+
+
+def test_config_array_api_dispatch_error_numpy(monkeypatch):
+    """Check error when NumPy is too old"""
+    # Pretend that array_api_compat is installed.
+    orig_import = builtins.__import__
+
+    def mocked_import(name, *args, **kwargs):
+        if name == "array_api_compat":
+            return object()
+        return orig_import(name, *args, **kwargs)
+
+    monkeypatch.setattr(builtins, "__import__", mocked_import)
+    monkeypatch.setattr(sklearn.utils._array_api.numpy, "__version__", "1.20")
+
+    with pytest.raises(ImportError, match="NumPy must be 1.21 or newer"):
+        with config_context(array_api_dispatch=True):
+            pass
+
+    with pytest.raises(ImportError, match="NumPy must be 1.21 or newer"):
+        set_config(array_api_dispatch=True)
@@ -0,0 +1,678 @@
+import numpy as np
+import pytest
+from scipy import linalg
+
+from sklearn.cluster import KMeans
+from sklearn.covariance import LedoitWolf, ShrunkCovariance, ledoit_wolf
+from sklearn.datasets import make_blobs
+from sklearn.discriminant_analysis import (
+    LinearDiscriminantAnalysis,
+    QuadraticDiscriminantAnalysis,
+    _cov,
+)
+from sklearn.preprocessing import StandardScaler
+from sklearn.utils import check_random_state
+from sklearn.utils._testing import (
+    _convert_container,
+    assert_allclose,
+    assert_almost_equal,
+    assert_array_almost_equal,
+    assert_array_equal,
+)
+from sklearn.utils.fixes import _IS_WASM
+
+# Data is just 6 separable points in the plane
+X = np.array([[-2, -1], [-1, -1], [-1, -2], [1, 1], [1, 2], [2, 1]], dtype="f")
+y = np.array([1, 1, 1, 2, 2, 2])
+y3 = np.array([1, 1, 2, 2, 3, 3])
+
+# Degenerate data with only one feature (still should be separable)
+X1 = np.array(
+    [[-2], [-1], [-1], [1], [1], [2]],
+    dtype="f",
+)
+
+# Data is just 9 separable points in the plane
+X6 = np.array(
+    [[0, 0], [-2, -2], [-2, -1], [-1, -1], [-1, -2], [1, 3], [1, 2], [2, 1], [2, 2]]
+)
+y6 = np.array([1, 1, 1, 1, 1, 2, 2, 2, 2])
+y7 = np.array([1, 2, 3, 2, 3, 1, 2, 3, 1])
+
+# Degenerate data with 1 feature (still should be separable)
+X7 = np.array([[-3], [-2], [-1], [-1], [0], [1], [1], [2], [3]])
+
+# Data that has zero variance in one dimension and needs regularization
+X2 = np.array(
+    [[-3, 0], [-2, 0], [-1, 0], [-1, 0], [0, 0], [1, 0], [1, 0], [2, 0], [3, 0]]
+)
+
+# One element class
+y4 = np.array([1, 1, 1, 1, 1, 1, 1, 1, 2])
+
+# Data with less samples in a class than n_features
+X5 = np.c_[np.arange(8), np.zeros((8, 3))]
+y5 = np.array([0, 0, 0, 0, 0, 1, 1, 1])
+
+solver_shrinkage = [
+    ("svd", None),
+    ("lsqr", None),
+    ("eigen", None),
+    ("lsqr", "auto"),
+    ("lsqr", 0),
+    ("lsqr", 0.43),
+    ("eigen", "auto"),
+    ("eigen", 0),
+    ("eigen", 0.43),
+]
+
+
+def test_lda_predict():
+    # Test LDA classification.
+    # This checks that LDA implements fit and predict and returns correct
+    # values for simple toy data.
+    for test_case in solver_shrinkage:
+        solver, shrinkage = test_case
+        clf = LinearDiscriminantAnalysis(solver=solver, shrinkage=shrinkage)
+        y_pred = clf.fit(X, y).predict(X)
+        assert_array_equal(y_pred, y, "solver %s" % solver)
+
+        # Assert that it works with 1D data
+        y_pred1 = clf.fit(X1, y).predict(X1)
+        assert_array_equal(y_pred1, y, "solver %s" % solver)
+
+        # Test probability estimates
+        y_proba_pred1 = clf.predict_proba(X1)
+        assert_array_equal((y_proba_pred1[:, 1] > 0.5) + 1, y, "solver %s" % solver)
+        y_log_proba_pred1 = clf.predict_log_proba(X1)
+        assert_allclose(
+            np.exp(y_log_proba_pred1),
+            y_proba_pred1,
+            rtol=1e-6,
+            atol=1e-6,
+            err_msg="solver %s" % solver,
+        )
+
+        # Primarily test for commit 2f34950 -- "reuse" of priors
+        y_pred3 = clf.fit(X, y3).predict(X)
+        # LDA shouldn't be able to separate those
+        assert np.any(y_pred3 != y3), "solver %s" % solver
+
+    clf = LinearDiscriminantAnalysis(solver="svd", shrinkage="auto")
+    with pytest.raises(NotImplementedError):
+        clf.fit(X, y)
+
+    clf = LinearDiscriminantAnalysis(
+        solver="lsqr", shrinkage=0.1, covariance_estimator=ShrunkCovariance()
+    )
+    with pytest.raises(
+        ValueError,
+        match=(
+            "covariance_estimator and shrinkage "
+            "parameters are not None. "
+            "Only one of the two can be set."
+        ),
+    ):
+        clf.fit(X, y)
+
+    # test bad solver with covariance_estimator
+    clf = LinearDiscriminantAnalysis(solver="svd", covariance_estimator=LedoitWolf())
+    with pytest.raises(
+        ValueError, match="covariance estimator is not supported with svd"
+    ):
+        clf.fit(X, y)
+
+    # test bad covariance estimator
+    clf = LinearDiscriminantAnalysis(
+        solver="lsqr", covariance_estimator=KMeans(n_clusters=2, n_init="auto")
+    )
+    with pytest.raises(ValueError):
+        clf.fit(X, y)
+
+
+@pytest.mark.parametrize("n_classes", [2, 3])
+@pytest.mark.parametrize("solver", ["svd", "lsqr", "eigen"])
+def test_lda_predict_proba(solver, n_classes):
+    def generate_dataset(n_samples, centers, covariances, random_state=None):
+        """Generate a multivariate normal data given some centers and
+        covariances"""
+        rng = check_random_state(random_state)
+        X = np.vstack(
+            [
+                rng.multivariate_normal(mean, cov, size=n_samples // len(centers))
+                for mean, cov in zip(centers, covariances)
+            ]
+        )
+        y = np.hstack(
+            [[clazz] * (n_samples // len(centers)) for clazz in range(len(centers))]
+        )
+        return X, y
+
+    blob_centers = np.array([[0, 0], [-10, 40], [-30, 30]])[:n_classes]
+    blob_stds = np.array([[[10, 10], [10, 100]]] * len(blob_centers))
+    X, y = generate_dataset(
+        n_samples=90000, centers=blob_centers, covariances=blob_stds, random_state=42
+    )
+    lda = LinearDiscriminantAnalysis(
+        solver=solver, store_covariance=True, shrinkage=None
+    ).fit(X, y)
+    # check that the empirical means and covariances are close enough to the
+    # one used to generate the data
+    assert_allclose(lda.means_, blob_centers, atol=1e-1)
+    assert_allclose(lda.covariance_, blob_stds[0], atol=1)
+
+    # implement the method to compute the probability given in The Elements
+    # of Statistical Learning (cf. p.127, Sect. 4.4.5 "Logistic Regression
+    # or LDA?")
+    precision = linalg.inv(blob_stds[0])
+    alpha_k = []
+    alpha_k_0 = []
+    for clazz in range(len(blob_centers) - 1):
+        alpha_k.append(
+            np.dot(precision, (blob_centers[clazz] - blob_centers[-1])[:, np.newaxis])
+        )
+        alpha_k_0.append(
+            np.dot(
+                -0.5 * (blob_centers[clazz] + blob_centers[-1])[np.newaxis, :],
+                alpha_k[-1],
+            )
+        )
+
+    sample = np.array([[-22, 22]])
+
+    def discriminant_func(sample, coef, intercept, clazz):
+        return np.exp(intercept[clazz] + np.dot(sample, coef[clazz])).item()
+
+    prob = np.array(
+        [
+            float(
+                discriminant_func(sample, alpha_k, alpha_k_0, clazz)
+                / (
+                    1
+                    + sum(
+                        [
+                            discriminant_func(sample, alpha_k, alpha_k_0, clazz)
+                            for clazz in range(n_classes - 1)
+                        ]
+                    )
+                )
+            )
+            for clazz in range(n_classes - 1)
+        ]
+    )
+
+    prob_ref = 1 - np.sum(prob)
+
+    # check the consistency of the computed probability
+    # all probabilities should sum to one
+    prob_ref_2 = float(
+        1
+        / (
+            1
+            + sum(
+                [
+                    discriminant_func(sample, alpha_k, alpha_k_0, clazz)
+                    for clazz in range(n_classes - 1)
+                ]
+            )
+        )
+    )
+
+    assert prob_ref == pytest.approx(prob_ref_2)
+    # check that the probability of LDA are close to the theoretical
+    # probabilities
+    assert_allclose(
+        lda.predict_proba(sample), np.hstack([prob, prob_ref])[np.newaxis], atol=1e-2
+    )
+
+
+def test_lda_priors():
+    # Test priors (negative priors)
+    priors = np.array([0.5, -0.5])
+    clf = LinearDiscriminantAnalysis(priors=priors)
+    msg = "priors must be non-negative"
+
+    with pytest.raises(ValueError, match=msg):
+        clf.fit(X, y)
+
+    # Test that priors passed as a list are correctly handled (run to see if
+    # failure)
+    clf = LinearDiscriminantAnalysis(priors=[0.5, 0.5])
+    clf.fit(X, y)
+
+    # Test that priors always sum to 1
+    priors = np.array([0.5, 0.6])
+    prior_norm = np.array([0.45, 0.55])
+    clf = LinearDiscriminantAnalysis(priors=priors)
+
+    with pytest.warns(UserWarning):
+        clf.fit(X, y)
+
+    assert_array_almost_equal(clf.priors_, prior_norm, 2)
+
+
+def test_lda_coefs():
+    # Test if the coefficients of the solvers are approximately the same.
+    n_features = 2
+    n_classes = 2
+    n_samples = 1000
+    X, y = make_blobs(
+        n_samples=n_samples, n_features=n_features, centers=n_classes, random_state=11
+    )
+
+    clf_lda_svd = LinearDiscriminantAnalysis(solver="svd")
+    clf_lda_lsqr = LinearDiscriminantAnalysis(solver="lsqr")
+    clf_lda_eigen = LinearDiscriminantAnalysis(solver="eigen")
+
+    clf_lda_svd.fit(X, y)
+    clf_lda_lsqr.fit(X, y)
+    clf_lda_eigen.fit(X, y)
+
+    assert_array_almost_equal(clf_lda_svd.coef_, clf_lda_lsqr.coef_, 1)
+    assert_array_almost_equal(clf_lda_svd.coef_, clf_lda_eigen.coef_, 1)
+    assert_array_almost_equal(clf_lda_eigen.coef_, clf_lda_lsqr.coef_, 1)
+
+
+def test_lda_transform():
+    # Test LDA transform.
+    clf = LinearDiscriminantAnalysis(solver="svd", n_components=1)
+    X_transformed = clf.fit(X, y).transform(X)
+    assert X_transformed.shape[1] == 1
+    clf = LinearDiscriminantAnalysis(solver="eigen", n_components=1)
+    X_transformed = clf.fit(X, y).transform(X)
+    assert X_transformed.shape[1] == 1
+
+    clf = LinearDiscriminantAnalysis(solver="lsqr", n_components=1)
+    clf.fit(X, y)
+    msg = "transform not implemented for 'lsqr'"
+
+    with pytest.raises(NotImplementedError, match=msg):
+        clf.transform(X)
+
+
+def test_lda_explained_variance_ratio():
+    # Test if the sum of the normalized eigen vectors values equals 1,
+    # Also tests whether the explained_variance_ratio_ formed by the
+    # eigen solver is the same as the explained_variance_ratio_ formed
+    # by the svd solver
+
+    state = np.random.RandomState(0)
+    X = state.normal(loc=0, scale=100, size=(40, 20))
+    y = state.randint(0, 3, size=(40,))
+
+    clf_lda_eigen = LinearDiscriminantAnalysis(solver="eigen")
+    clf_lda_eigen.fit(X, y)
+    assert_almost_equal(clf_lda_eigen.explained_variance_ratio_.sum(), 1.0, 3)
+    assert clf_lda_eigen.explained_variance_ratio_.shape == (
+        2,
+    ), "Unexpected length for explained_variance_ratio_"
+
+    clf_lda_svd = LinearDiscriminantAnalysis(solver="svd")
+    clf_lda_svd.fit(X, y)
+    assert_almost_equal(clf_lda_svd.explained_variance_ratio_.sum(), 1.0, 3)
+    assert clf_lda_svd.explained_variance_ratio_.shape == (
+        2,
+    ), "Unexpected length for explained_variance_ratio_"
+
+    assert_array_almost_equal(
+        clf_lda_svd.explained_variance_ratio_, clf_lda_eigen.explained_variance_ratio_
+    )
+
+
+def test_lda_orthogonality():
+    # arrange four classes with their means in a kite-shaped pattern
+    # the longer distance should be transformed to the first component, and
+    # the shorter distance to the second component.
+    means = np.array([[0, 0, -1], [0, 2, 0], [0, -2, 0], [0, 0, 5]])
+
+    # We construct perfectly symmetric distributions, so the LDA can estimate
+    # precise means.
+    scatter = np.array(
+        [
+            [0.1, 0, 0],
+            [-0.1, 0, 0],
+            [0, 0.1, 0],
+            [0, -0.1, 0],
+            [0, 0, 0.1],
+            [0, 0, -0.1],
+        ]
+    )
+
+    X = (means[:, np.newaxis, :] + scatter[np.newaxis, :, :]).reshape((-1, 3))
+    y = np.repeat(np.arange(means.shape[0]), scatter.shape[0])
+
+    # Fit LDA and transform the means
+    clf = LinearDiscriminantAnalysis(solver="svd").fit(X, y)
+    means_transformed = clf.transform(means)
+
+    d1 = means_transformed[3] - means_transformed[0]
+    d2 = means_transformed[2] - means_transformed[1]
+    d1 /= np.sqrt(np.sum(d1**2))
+    d2 /= np.sqrt(np.sum(d2**2))
+
+    # the transformed within-class covariance should be the identity matrix
+    assert_almost_equal(np.cov(clf.transform(scatter).T), np.eye(2))
+
+    # the means of classes 0 and 3 should lie on the first component
+    assert_almost_equal(np.abs(np.dot(d1[:2], [1, 0])), 1.0)
+
+    # the means of classes 1 and 2 should lie on the second component
+    assert_almost_equal(np.abs(np.dot(d2[:2], [0, 1])), 1.0)
+
+
+def test_lda_scaling():
+    # Test if classification works correctly with differently scaled features.
+    n = 100
+    rng = np.random.RandomState(1234)
+    # use uniform distribution of features to make sure there is absolutely no
+    # overlap between classes.
+    x1 = rng.uniform(-1, 1, (n, 3)) + [-10, 0, 0]
+    x2 = rng.uniform(-1, 1, (n, 3)) + [10, 0, 0]
+    x = np.vstack((x1, x2)) * [1, 100, 10000]
+    y = [-1] * n + [1] * n
+
+    for solver in ("svd", "lsqr", "eigen"):
+        clf = LinearDiscriminantAnalysis(solver=solver)
+        # should be able to separate the data perfectly
+        assert clf.fit(x, y).score(x, y) == 1.0, "using covariance: %s" % solver
+
+
+def test_lda_store_covariance():
+    # Test for solver 'lsqr' and 'eigen'
+    # 'store_covariance' has no effect on 'lsqr' and 'eigen' solvers
+    for solver in ("lsqr", "eigen"):
+        clf = LinearDiscriminantAnalysis(solver=solver).fit(X6, y6)
+        assert hasattr(clf, "covariance_")
+
+        # Test the actual attribute:
+        clf = LinearDiscriminantAnalysis(solver=solver, store_covariance=True).fit(
+            X6, y6
+        )
+        assert hasattr(clf, "covariance_")
+
+        assert_array_almost_equal(
+            clf.covariance_, np.array([[0.422222, 0.088889], [0.088889, 0.533333]])
+        )
+
+    # Test for SVD solver, the default is to not set the covariances_ attribute
+    clf = LinearDiscriminantAnalysis(solver="svd").fit(X6, y6)
+    assert not hasattr(clf, "covariance_")
+
+    # Test the actual attribute:
+    clf = LinearDiscriminantAnalysis(solver=solver, store_covariance=True).fit(X6, y6)
+    assert hasattr(clf, "covariance_")
+
+    assert_array_almost_equal(
+        clf.covariance_, np.array([[0.422222, 0.088889], [0.088889, 0.533333]])
+    )
+
+
+@pytest.mark.parametrize("seed", range(10))
+def test_lda_shrinkage(seed):
+    # Test that shrunk covariance estimator and shrinkage parameter behave the
+    # same
+    rng = np.random.RandomState(seed)
+    X = rng.rand(100, 10)
+    y = rng.randint(3, size=(100))
+    c1 = LinearDiscriminantAnalysis(store_covariance=True, shrinkage=0.5, solver="lsqr")
+    c2 = LinearDiscriminantAnalysis(
+        store_covariance=True,
+        covariance_estimator=ShrunkCovariance(shrinkage=0.5),
+        solver="lsqr",
+    )
+    c1.fit(X, y)
+    c2.fit(X, y)
+    assert_allclose(c1.means_, c2.means_)
+    assert_allclose(c1.covariance_, c2.covariance_)
+
+
+def test_lda_ledoitwolf():
+    # When shrinkage="auto" current implementation uses ledoitwolf estimation
+    # of covariance after standardizing the data. This checks that it is indeed
+    # the case
+    class StandardizedLedoitWolf:
+        def fit(self, X):
+            sc = StandardScaler()  # standardize features
+            X_sc = sc.fit_transform(X)
+            s = ledoit_wolf(X_sc)[0]
+            # rescale
+            s = sc.scale_[:, np.newaxis] * s * sc.scale_[np.newaxis, :]
+            self.covariance_ = s
+
+    rng = np.random.RandomState(0)
+    X = rng.rand(100, 10)
+    y = rng.randint(3, size=(100,))
+    c1 = LinearDiscriminantAnalysis(
+        store_covariance=True, shrinkage="auto", solver="lsqr"
+    )
+    c2 = LinearDiscriminantAnalysis(
+        store_covariance=True,
+        covariance_estimator=StandardizedLedoitWolf(),
+        solver="lsqr",
+    )
+    c1.fit(X, y)
+    c2.fit(X, y)
+    assert_allclose(c1.means_, c2.means_)
+    assert_allclose(c1.covariance_, c2.covariance_)
+
+
+@pytest.mark.parametrize("n_features", [3, 5])
+@pytest.mark.parametrize("n_classes", [5, 3])
+def test_lda_dimension_warning(n_classes, n_features):
+    rng = check_random_state(0)
+    n_samples = 10
+    X = rng.randn(n_samples, n_features)
+    # we create n_classes labels by repeating and truncating a
+    # range(n_classes) until n_samples
+    y = np.tile(range(n_classes), n_samples // n_classes + 1)[:n_samples]
+    max_components = min(n_features, n_classes - 1)
+
+    for n_components in [max_components - 1, None, max_components]:
+        # if n_components <= min(n_classes - 1, n_features), no warning
+        lda = LinearDiscriminantAnalysis(n_components=n_components)
+        lda.fit(X, y)
+
+    for n_components in [max_components + 1, max(n_features, n_classes - 1) + 1]:
+        # if n_components > min(n_classes - 1, n_features), raise error.
+        # We test one unit higher than max_components, and then something
+        # larger than both n_features and n_classes - 1 to ensure the test
+        # works for any value of n_component
+        lda = LinearDiscriminantAnalysis(n_components=n_components)
+        msg = "n_components cannot be larger than "
+        with pytest.raises(ValueError, match=msg):
+            lda.fit(X, y)
+
+
+@pytest.mark.parametrize(
+    "data_type, expected_type",
+    [
+        (np.float32, np.float32),
+        (np.float64, np.float64),
+        (np.int32, np.float64),
+        (np.int64, np.float64),
+    ],
+)
+def test_lda_dtype_match(data_type, expected_type):
+    for solver, shrinkage in solver_shrinkage:
+        clf = LinearDiscriminantAnalysis(solver=solver, shrinkage=shrinkage)
+        clf.fit(X.astype(data_type), y.astype(data_type))
+        assert clf.coef_.dtype == expected_type
+
+
+def test_lda_numeric_consistency_float32_float64():
+    for solver, shrinkage in solver_shrinkage:
+        clf_32 = LinearDiscriminantAnalysis(solver=solver, shrinkage=shrinkage)
+        clf_32.fit(X.astype(np.float32), y.astype(np.float32))
+        clf_64 = LinearDiscriminantAnalysis(solver=solver, shrinkage=shrinkage)
+        clf_64.fit(X.astype(np.float64), y.astype(np.float64))
+
+        # Check value consistency between types
+        rtol = 1e-6
+        assert_allclose(clf_32.coef_, clf_64.coef_, rtol=rtol)
+
+
+def test_qda():
+    # QDA classification.
+    # This checks that QDA implements fit and predict and returns
+    # correct values for a simple toy dataset.
+    clf = QuadraticDiscriminantAnalysis()
+    y_pred = clf.fit(X6, y6).predict(X6)
+    assert_array_equal(y_pred, y6)
+
+    # Assure that it works with 1D data
+    y_pred1 = clf.fit(X7, y6).predict(X7)
+    assert_array_equal(y_pred1, y6)
+
+    # Test probas estimates
+    y_proba_pred1 = clf.predict_proba(X7)
+    assert_array_equal((y_proba_pred1[:, 1] > 0.5) + 1, y6)
+    y_log_proba_pred1 = clf.predict_log_proba(X7)
+    assert_array_almost_equal(np.exp(y_log_proba_pred1), y_proba_pred1, 8)
+
+    y_pred3 = clf.fit(X6, y7).predict(X6)
+    # QDA shouldn't be able to separate those
+    assert np.any(y_pred3 != y7)
+
+    # Classes should have at least 2 elements
+    with pytest.raises(ValueError):
+        clf.fit(X6, y4)
+
+
+def test_qda_priors():
+    clf = QuadraticDiscriminantAnalysis()
+    y_pred = clf.fit(X6, y6).predict(X6)
+    n_pos = np.sum(y_pred == 2)
+
+    neg = 1e-10
+    clf = QuadraticDiscriminantAnalysis(priors=np.array([neg, 1 - neg]))
+    y_pred = clf.fit(X6, y6).predict(X6)
+    n_pos2 = np.sum(y_pred == 2)
+
+    assert n_pos2 > n_pos
+
+
+@pytest.mark.parametrize("priors_type", ["list", "tuple", "array"])
+def test_qda_prior_type(priors_type):
+    """Check that priors accept array-like."""
+    priors = [0.5, 0.5]
+    clf = QuadraticDiscriminantAnalysis(
+        priors=_convert_container([0.5, 0.5], priors_type)
+    ).fit(X6, y6)
+    assert isinstance(clf.priors_, np.ndarray)
+    assert_array_equal(clf.priors_, priors)
+
+
+def test_qda_prior_copy():
+    """Check that altering `priors` without `fit` doesn't change `priors_`"""
+    priors = np.array([0.5, 0.5])
+    qda = QuadraticDiscriminantAnalysis(priors=priors).fit(X, y)
+
+    # we expect the following
+    assert_array_equal(qda.priors_, qda.priors)
+
+    # altering `priors` without `fit` should not change `priors_`
+    priors[0] = 0.2
+    assert qda.priors_[0] != qda.priors[0]
+
+
+def test_qda_store_covariance():
+    # The default is to not set the covariances_ attribute
+    clf = QuadraticDiscriminantAnalysis().fit(X6, y6)
+    assert not hasattr(clf, "covariance_")
+
+    # Test the actual attribute:
+    clf = QuadraticDiscriminantAnalysis(store_covariance=True).fit(X6, y6)
+    assert hasattr(clf, "covariance_")
+
+    assert_array_almost_equal(clf.covariance_[0], np.array([[0.7, 0.45], [0.45, 0.7]]))
+
+    assert_array_almost_equal(
+        clf.covariance_[1],
+        np.array([[0.33333333, -0.33333333], [-0.33333333, 0.66666667]]),
+    )
+
+
+@pytest.mark.xfail(
+    _IS_WASM,
+    reason=(
+        "no floating point exceptions, see"
+        " https://github.com/numpy/numpy/pull/21895#issuecomment-1311525881"
+    ),
+)
+def test_qda_regularization():
+    # The default is reg_param=0. and will cause issues when there is a
+    # constant variable.
+
+    # Fitting on data with constant variable triggers an UserWarning.
+    collinear_msg = "Variables are collinear"
+    clf = QuadraticDiscriminantAnalysis()
+    with pytest.warns(UserWarning, match=collinear_msg):
+        y_pred = clf.fit(X2, y6)
+
+    # XXX: RuntimeWarning is also raised at predict time because of divisions
+    # by zero when the model is fit with a constant feature and without
+    # regularization: should this be considered a bug? Either by the fit-time
+    # message more informative, raising and exception instead of a warning in
+    # this case or somehow changing predict to avoid division by zero.
+    with pytest.warns(RuntimeWarning, match="divide by zero"):
+        y_pred = clf.predict(X2)
+    assert np.any(y_pred != y6)
+
+    # Adding a little regularization fixes the division by zero at predict
+    # time. But UserWarning will persist at fit time.
+    clf = QuadraticDiscriminantAnalysis(reg_param=0.01)
+    with pytest.warns(UserWarning, match=collinear_msg):
+        clf.fit(X2, y6)
+    y_pred = clf.predict(X2)
+    assert_array_equal(y_pred, y6)
+
+    # UserWarning should also be there for the n_samples_in_a_class <
+    # n_features case.
+    clf = QuadraticDiscriminantAnalysis(reg_param=0.1)
+    with pytest.warns(UserWarning, match=collinear_msg):
+        clf.fit(X5, y5)
+    y_pred5 = clf.predict(X5)
+    assert_array_equal(y_pred5, y5)
+
+
+def test_covariance():
+    x, y = make_blobs(n_samples=100, n_features=5, centers=1, random_state=42)
+
+    # make features correlated
+    x = np.dot(x, np.arange(x.shape[1] ** 2).reshape(x.shape[1], x.shape[1]))
+
+    c_e = _cov(x, "empirical")
+    assert_almost_equal(c_e, c_e.T)
+
+    c_s = _cov(x, "auto")
+    assert_almost_equal(c_s, c_s.T)
+
+
+@pytest.mark.parametrize("solver", ["svd", "lsqr", "eigen"])
+def test_raises_value_error_on_same_number_of_classes_and_samples(solver):
+    """
+    Tests that if the number of samples equals the number
+    of classes, a ValueError is raised.
+    """
+    X = np.array([[0.5, 0.6], [0.6, 0.5]])
+    y = np.array(["a", "b"])
+    clf = LinearDiscriminantAnalysis(solver=solver)
+    with pytest.raises(ValueError, match="The number of samples must be more"):
+        clf.fit(X, y)
+
+
+def test_get_feature_names_out():
+    """Check get_feature_names_out uses class name as prefix."""
+
+    est = LinearDiscriminantAnalysis().fit(X, y)
+    names_out = est.get_feature_names_out()
+
+    class_name_lower = "LinearDiscriminantAnalysis".lower()
+    expected_names_out = np.array(
+        [
+            f"{class_name_lower}{i}"
+            for i in range(est.explained_variance_ratio_.shape[0])
+        ],
+        dtype=object,
+    )
+    assert_array_equal(names_out, expected_names_out)
@@ -0,0 +1,333 @@
+# Authors: Alexandre Gramfort <alexandre.gramfort@inria.fr>
+#          Raghav RV <rvraghav93@gmail.com>
+# License: BSD 3 clause
+
+import importlib
+import inspect
+import os
+import warnings
+from inspect import signature
+from pkgutil import walk_packages
+
+import numpy as np
+import pytest
+
+import sklearn
+from sklearn.datasets import make_classification
+
+# make it possible to discover experimental estimators when calling `all_estimators`
+from sklearn.experimental import (
+    enable_halving_search_cv,  # noqa
+    enable_iterative_imputer,  # noqa
+)
+from sklearn.linear_model import LogisticRegression
+from sklearn.preprocessing import FunctionTransformer
+from sklearn.utils import all_estimators
+from sklearn.utils._testing import (
+    _get_func_name,
+    check_docstring_parameters,
+    ignore_warnings,
+)
+from sklearn.utils.deprecation import _is_deprecated
+from sklearn.utils.estimator_checks import (
+    _construct_instance,
+    _enforce_estimator_tags_X,
+    _enforce_estimator_tags_y,
+)
+from sklearn.utils.fixes import _IS_PYPY, parse_version, sp_version
+
+# walk_packages() ignores DeprecationWarnings, now we need to ignore
+# FutureWarnings
+with warnings.catch_warnings():
+    warnings.simplefilter("ignore", FutureWarning)
+    # mypy error: Module has no attribute "__path__"
+    sklearn_path = [os.path.dirname(sklearn.__file__)]
+    PUBLIC_MODULES = set(
+        [
+            pckg[1]
+            for pckg in walk_packages(prefix="sklearn.", path=sklearn_path)
+            if not ("._" in pckg[1] or ".tests." in pckg[1])
+        ]
+    )
+
+# functions to ignore args / docstring of
+# TODO(1.7): remove "sklearn.utils._joblib"
+_DOCSTRING_IGNORES = [
+    "sklearn.utils.deprecation.load_mlcomp",
+    "sklearn.pipeline.make_pipeline",
+    "sklearn.pipeline.make_union",
+    "sklearn.utils.extmath.safe_sparse_dot",
+    "sklearn.utils._joblib",
+    "HalfBinomialLoss",
+]
+
+# Methods where y param should be ignored if y=None by default
+_METHODS_IGNORE_NONE_Y = [
+    "fit",
+    "score",
+    "fit_predict",
+    "fit_transform",
+    "partial_fit",
+    "predict",
+]
+
+
+# numpydoc 0.8.0's docscrape tool raises because of collections.abc under
+# Python 3.7
+@pytest.mark.filterwarnings("ignore::FutureWarning")
+@pytest.mark.filterwarnings("ignore::DeprecationWarning")
+@pytest.mark.skipif(_IS_PYPY, reason="test segfaults on PyPy")
+def test_docstring_parameters():
+    # Test module docstring formatting
+
+    # Skip test if numpydoc is not found
+    pytest.importorskip(
+        "numpydoc", reason="numpydoc is required to test the docstrings"
+    )
+
+    # XXX unreached code as of v0.22
+    from numpydoc import docscrape
+
+    incorrect = []
+    for name in PUBLIC_MODULES:
+        if name.endswith(".conftest"):
+            # pytest tooling, not part of the scikit-learn API
+            continue
+        if name == "sklearn.utils.fixes":
+            # We cannot always control these docstrings
+            continue
+        with warnings.catch_warnings(record=True):
+            module = importlib.import_module(name)
+        classes = inspect.getmembers(module, inspect.isclass)
+        # Exclude non-scikit-learn classes
+        classes = [cls for cls in classes if cls[1].__module__.startswith("sklearn")]
+        for cname, cls in classes:
+            this_incorrect = []
+            if cname in _DOCSTRING_IGNORES or cname.startswith("_"):
+                continue
+            if inspect.isabstract(cls):
+                continue
+            with warnings.catch_warnings(record=True) as w:
+                cdoc = docscrape.ClassDoc(cls)
+            if len(w):
+                raise RuntimeError(
+                    "Error for __init__ of %s in %s:\n%s" % (cls, name, w[0])
+                )
+
+            # Skip checks on deprecated classes
+            if _is_deprecated(cls.__new__):
+                continue
+
+            this_incorrect += check_docstring_parameters(cls.__init__, cdoc)
+
+            for method_name in cdoc.methods:
+                method = getattr(cls, method_name)
+                if _is_deprecated(method):
+                    continue
+                param_ignore = None
+                # Now skip docstring test for y when y is None
+                # by default for API reason
+                if method_name in _METHODS_IGNORE_NONE_Y:
+                    sig = signature(method)
+                    if "y" in sig.parameters and sig.parameters["y"].default is None:
+                        param_ignore = ["y"]  # ignore y for fit and score
+                result = check_docstring_parameters(method, ignore=param_ignore)
+                this_incorrect += result
+
+            incorrect += this_incorrect
+
+        functions = inspect.getmembers(module, inspect.isfunction)
+        # Exclude imported functions
+        functions = [fn for fn in functions if fn[1].__module__ == name]
+        for fname, func in functions:
+            # Don't test private methods / functions
+            if fname.startswith("_"):
+                continue
+            if fname == "configuration" and name.endswith("setup"):
+                continue
+            name_ = _get_func_name(func)
+            if not any(d in name_ for d in _DOCSTRING_IGNORES) and not _is_deprecated(
+                func
+            ):
+                incorrect += check_docstring_parameters(func)
+
+    msg = "\n".join(incorrect)
+    if len(incorrect) > 0:
+        raise AssertionError("Docstring Error:\n" + msg)
+
+
+def _construct_searchcv_instance(SearchCV):
+    return SearchCV(LogisticRegression(), {"C": [0.1, 1]})
+
+
+def _construct_compose_pipeline_instance(Estimator):
+    # Minimal / degenerate instances: only useful to test the docstrings.
+    if Estimator.__name__ == "ColumnTransformer":
+        return Estimator(transformers=[("transformer", "passthrough", [0, 1])])
+    elif Estimator.__name__ == "Pipeline":
+        return Estimator(steps=[("clf", LogisticRegression())])
+    elif Estimator.__name__ == "FeatureUnion":
+        return Estimator(transformer_list=[("transformer", FunctionTransformer())])
+
+
+def _construct_sparse_coder(Estimator):
+    # XXX: hard-coded assumption that n_features=3
+    dictionary = np.array(
+        [[0, 1, 0], [-1, -1, 2], [1, 1, 1], [0, 1, 1], [0, 2, 1]],
+        dtype=np.float64,
+    )
+    return Estimator(dictionary=dictionary)
+
+
+@ignore_warnings(category=sklearn.exceptions.ConvergenceWarning)
+# TODO(1.6): remove "@pytest.mark.filterwarnings" as SAMME.R will be removed
+# and substituted with the SAMME algorithm as a default
+@pytest.mark.filterwarnings("ignore:The SAMME.R algorithm")
+@pytest.mark.parametrize("name, Estimator", all_estimators())
+def test_fit_docstring_attributes(name, Estimator):
+    pytest.importorskip("numpydoc")
+    from numpydoc import docscrape
+
+    doc = docscrape.ClassDoc(Estimator)
+    attributes = doc["Attributes"]
+
+    if Estimator.__name__ in (
+        "HalvingRandomSearchCV",
+        "RandomizedSearchCV",
+        "HalvingGridSearchCV",
+        "GridSearchCV",
+    ):
+        est = _construct_searchcv_instance(Estimator)
+    elif Estimator.__name__ in (
+        "ColumnTransformer",
+        "Pipeline",
+        "FeatureUnion",
+    ):
+        est = _construct_compose_pipeline_instance(Estimator)
+    elif Estimator.__name__ == "SparseCoder":
+        est = _construct_sparse_coder(Estimator)
+    else:
+        est = _construct_instance(Estimator)
+
+    if Estimator.__name__ == "SelectKBest":
+        est.set_params(k=2)
+    elif Estimator.__name__ == "DummyClassifier":
+        est.set_params(strategy="stratified")
+    elif Estimator.__name__ == "CCA" or Estimator.__name__.startswith("PLS"):
+        # default = 2 is invalid for single target
+        est.set_params(n_components=1)
+    elif Estimator.__name__ in (
+        "GaussianRandomProjection",
+        "SparseRandomProjection",
+    ):
+        # default="auto" raises an error with the shape of `X`
+        est.set_params(n_components=2)
+    elif Estimator.__name__ == "TSNE":
+        # default raises an error, perplexity must be less than n_samples
+        est.set_params(perplexity=2)
+
+    # TODO(1.6): remove (avoid FutureWarning)
+    if Estimator.__name__ in ("NMF", "MiniBatchNMF"):
+        est.set_params(n_components="auto")
+
+    if Estimator.__name__ == "QuantileRegressor":
+        solver = "highs" if sp_version >= parse_version("1.6.0") else "interior-point"
+        est.set_params(solver=solver)
+
+    # Low max iter to speed up tests: we are only interested in checking the existence
+    # of fitted attributes. This should be invariant to whether it has converged or not.
+    if "max_iter" in est.get_params():
+        est.set_params(max_iter=2)
+        # min value for `TSNE` is 250
+        if Estimator.__name__ == "TSNE":
+            est.set_params(max_iter=250)
+
+    if "random_state" in est.get_params():
+        est.set_params(random_state=0)
+
+    # In case we want to deprecate some attributes in the future
+    skipped_attributes = {}
+
+    if Estimator.__name__.endswith("Vectorizer"):
+        # Vectorizer require some specific input data
+        if Estimator.__name__ in (
+            "CountVectorizer",
+            "HashingVectorizer",
+            "TfidfVectorizer",
+        ):
+            X = [
+                "This is the first document.",
+                "This document is the second document.",
+                "And this is the third one.",
+                "Is this the first document?",
+            ]
+        elif Estimator.__name__ == "DictVectorizer":
+            X = [{"foo": 1, "bar": 2}, {"foo": 3, "baz": 1}]
+        y = None
+    else:
+        X, y = make_classification(
+            n_samples=20,
+            n_features=3,
+            n_redundant=0,
+            n_classes=2,
+            random_state=2,
+        )
+
+        y = _enforce_estimator_tags_y(est, y)
+        X = _enforce_estimator_tags_X(est, X)
+
+    if "1dlabels" in est._get_tags()["X_types"]:
+        est.fit(y)
+    elif "2dlabels" in est._get_tags()["X_types"]:
+        est.fit(np.c_[y, y])
+    elif "3darray" in est._get_tags()["X_types"]:
+        est.fit(X[np.newaxis, ...], y)
+    else:
+        est.fit(X, y)
+
+    for attr in attributes:
+        if attr.name in skipped_attributes:
+            continue
+        desc = " ".join(attr.desc).lower()
+        # As certain attributes are present "only" if a certain parameter is
+        # provided, this checks if the word "only" is present in the attribute
+        # description, and if not the attribute is required to be present.
+        if "only " in desc:
+            continue
+        # ignore deprecation warnings
+        with ignore_warnings(category=FutureWarning):
+            assert hasattr(est, attr.name)
+
+    fit_attr = _get_all_fitted_attributes(est)
+    fit_attr_names = [attr.name for attr in attributes]
+    undocumented_attrs = set(fit_attr).difference(fit_attr_names)
+    undocumented_attrs = set(undocumented_attrs).difference(skipped_attributes)
+    if undocumented_attrs:
+        raise AssertionError(
+            f"Undocumented attributes for {Estimator.__name__}: {undocumented_attrs}"
+        )
+
+
+def _get_all_fitted_attributes(estimator):
+    "Get all the fitted attributes of an estimator including properties"
+    # attributes
+    fit_attr = list(estimator.__dict__.keys())
+
+    # properties
+    with warnings.catch_warnings():
+        warnings.filterwarnings("error", category=FutureWarning)
+
+        for name in dir(estimator.__class__):
+            obj = getattr(estimator.__class__, name)
+            if not isinstance(obj, property):
+                continue
+
+            # ignore properties that raises an AttributeError and deprecated
+            # properties
+            try:
+                getattr(estimator, name)
+            except (AttributeError, FutureWarning):
+                continue
+            fit_attr.append(name)
+
+    return [k for k in fit_attr if k.endswith("_") and not k.startswith("_")]
@@ -0,0 +1,208 @@
+import re
+from inspect import signature
+from typing import Optional
+
+import pytest
+
+# make it possible to discover experimental estimators when calling `all_estimators`
+from sklearn.experimental import (
+    enable_halving_search_cv,  # noqa
+    enable_iterative_imputer,  # noqa
+)
+from sklearn.utils.discovery import all_displays, all_estimators, all_functions
+
+numpydoc_validation = pytest.importorskip("numpydoc.validate")
+
+
+def get_all_methods():
+    estimators = all_estimators()
+    displays = all_displays()
+    for name, Klass in estimators + displays:
+        if name.startswith("_"):
+            # skip private classes
+            continue
+        methods = []
+        for name in dir(Klass):
+            if name.startswith("_"):
+                continue
+            method_obj = getattr(Klass, name)
+            if hasattr(method_obj, "__call__") or isinstance(method_obj, property):
+                methods.append(name)
+        methods.append(None)
+
+        for method in sorted(methods, key=str):
+            yield Klass, method
+
+
+def get_all_functions_names():
+    functions = all_functions()
+    for _, func in functions:
+        # exclude functions from utils.fixex since they come from external packages
+        if "utils.fixes" not in func.__module__:
+            yield f"{func.__module__}.{func.__name__}"
+
+
+def filter_errors(errors, method, Klass=None):
+    """
+    Ignore some errors based on the method type.
+
+    These rules are specific for scikit-learn."""
+    for code, message in errors:
+        # We ignore following error code,
+        #  - RT02: The first line of the Returns section
+        #    should contain only the type, ..
+        #   (as we may need refer to the name of the returned
+        #    object)
+        #  - GL01: Docstring text (summary) should start in the line
+        #    immediately after the opening quotes (not in the same line,
+        #    or leaving a blank line in between)
+        #  - GL02: If there's a blank line, it should be before the
+        #    first line of the Returns section, not after (it allows to have
+        #    short docstrings for properties).
+
+        if code in ["RT02", "GL01", "GL02"]:
+            continue
+
+        # Ignore PR02: Unknown parameters for properties. We sometimes use
+        # properties for ducktyping, i.e. SGDClassifier.predict_proba
+        # Ignore GL08: Parsing of the method signature failed, possibly because this is
+        # a property. Properties are sometimes used for deprecated attributes and the
+        # attribute is already documented in the class docstring.
+        #
+        # All error codes:
+        # https://numpydoc.readthedocs.io/en/latest/validation.html#built-in-validation-checks
+        if code in ("PR02", "GL08") and Klass is not None and method is not None:
+            method_obj = getattr(Klass, method)
+            if isinstance(method_obj, property):
+                continue
+
+        # Following codes are only taken into account for the
+        # top level class docstrings:
+        #  - ES01: No extended summary found
+        #  - SA01: See Also section not found
+        #  - EX01: No examples section found
+
+        if method is not None and code in ["EX01", "SA01", "ES01"]:
+            continue
+        yield code, message
+
+
+def repr_errors(res, Klass=None, method: Optional[str] = None) -> str:
+    """Pretty print original docstring and the obtained errors
+
+    Parameters
+    ----------
+    res : dict
+        result of numpydoc.validate.validate
+    Klass : {Estimator, Display, None}
+        estimator object or None
+    method : str
+        if estimator is not None, either the method name or None.
+
+    Returns
+    -------
+    str
+       String representation of the error.
+    """
+    if method is None:
+        if hasattr(Klass, "__init__"):
+            method = "__init__"
+        elif Klass is None:
+            raise ValueError("At least one of Klass, method should be provided")
+        else:
+            raise NotImplementedError
+
+    if Klass is not None:
+        obj = getattr(Klass, method)
+        try:
+            obj_signature = str(signature(obj))
+        except TypeError:
+            # In particular we can't parse the signature of properties
+            obj_signature = (
+                "\nParsing of the method signature failed, "
+                "possibly because this is a property."
+            )
+
+        obj_name = Klass.__name__ + "." + method
+    else:
+        obj_signature = ""
+        obj_name = method
+
+    msg = "\n\n" + "\n\n".join(
+        [
+            str(res["file"]),
+            obj_name + obj_signature,
+            res["docstring"],
+            "# Errors",
+            "\n".join(
+                " - {}: {}".format(code, message) for code, message in res["errors"]
+            ),
+        ]
+    )
+    return msg
+
+
+@pytest.mark.parametrize("function_name", get_all_functions_names())
+def test_function_docstring(function_name, request):
+    """Check function docstrings using numpydoc."""
+    res = numpydoc_validation.validate(function_name)
+
+    res["errors"] = list(filter_errors(res["errors"], method="function"))
+
+    if res["errors"]:
+        msg = repr_errors(res, method=f"Tested function: {function_name}")
+
+        raise ValueError(msg)
+
+
+@pytest.mark.parametrize("Klass, method", get_all_methods())
+def test_docstring(Klass, method, request):
+    base_import_path = Klass.__module__
+    import_path = [base_import_path, Klass.__name__]
+    if method is not None:
+        import_path.append(method)
+
+    import_path = ".".join(import_path)
+
+    res = numpydoc_validation.validate(import_path)
+
+    res["errors"] = list(filter_errors(res["errors"], method, Klass=Klass))
+
+    if res["errors"]:
+        msg = repr_errors(res, Klass, method)
+
+        raise ValueError(msg)
+
+
+if __name__ == "__main__":
+    import argparse
+    import sys
+
+    parser = argparse.ArgumentParser(description="Validate docstring with numpydoc.")
+    parser.add_argument("import_path", help="Import path to validate")
+
+    args = parser.parse_args()
+
+    res = numpydoc_validation.validate(args.import_path)
+
+    import_path_sections = args.import_path.split(".")
+    # When applied to classes, detect class method. For functions
+    # method = None.
+    # TODO: this detection can be improved. Currently we assume that we have
+    # class # methods if the second path element before last is in camel case.
+    if len(import_path_sections) >= 2 and re.match(
+        r"(?:[A-Z][a-z]*)+", import_path_sections[-2]
+    ):
+        method = import_path_sections[-1]
+    else:
+        method = None
+
+    res["errors"] = list(filter_errors(res["errors"], method))
+
+    if res["errors"]:
+        msg = repr_errors(res, method=args.import_path)
+
+        print(msg)
+        sys.exit(1)
+    else:
+        print("All docstring checks passed for {}!".format(args.import_path))
@@ -0,0 +1,710 @@
+import numpy as np
+import pytest
+import scipy.sparse as sp
+
+from sklearn.base import clone
+from sklearn.dummy import DummyClassifier, DummyRegressor
+from sklearn.exceptions import NotFittedError
+from sklearn.utils._testing import (
+    assert_almost_equal,
+    assert_array_almost_equal,
+    assert_array_equal,
+    ignore_warnings,
+)
+from sklearn.utils.fixes import CSC_CONTAINERS
+from sklearn.utils.stats import _weighted_percentile
+
+
+@ignore_warnings
+def _check_predict_proba(clf, X, y):
+    proba = clf.predict_proba(X)
+    # We know that we can have division by zero
+    log_proba = clf.predict_log_proba(X)
+
+    y = np.atleast_1d(y)
+    if y.ndim == 1:
+        y = np.reshape(y, (-1, 1))
+
+    n_outputs = y.shape[1]
+    n_samples = len(X)
+
+    if n_outputs == 1:
+        proba = [proba]
+        log_proba = [log_proba]
+
+    for k in range(n_outputs):
+        assert proba[k].shape[0] == n_samples
+        assert proba[k].shape[1] == len(np.unique(y[:, k]))
+        assert_array_almost_equal(proba[k].sum(axis=1), np.ones(len(X)))
+        # We know that we can have division by zero
+        assert_array_almost_equal(np.log(proba[k]), log_proba[k])
+
+
+def _check_behavior_2d(clf):
+    # 1d case
+    X = np.array([[0], [0], [0], [0]])  # ignored
+    y = np.array([1, 2, 1, 1])
+    est = clone(clf)
+    est.fit(X, y)
+    y_pred = est.predict(X)
+    assert y.shape == y_pred.shape
+
+    # 2d case
+    y = np.array([[1, 0], [2, 0], [1, 0], [1, 3]])
+    est = clone(clf)
+    est.fit(X, y)
+    y_pred = est.predict(X)
+    assert y.shape == y_pred.shape
+
+
+def _check_behavior_2d_for_constant(clf):
+    # 2d case only
+    X = np.array([[0], [0], [0], [0]])  # ignored
+    y = np.array([[1, 0, 5, 4, 3], [2, 0, 1, 2, 5], [1, 0, 4, 5, 2], [1, 3, 3, 2, 0]])
+    est = clone(clf)
+    est.fit(X, y)
+    y_pred = est.predict(X)
+    assert y.shape == y_pred.shape
+
+
+def _check_equality_regressor(statistic, y_learn, y_pred_learn, y_test, y_pred_test):
+    assert_array_almost_equal(np.tile(statistic, (y_learn.shape[0], 1)), y_pred_learn)
+    assert_array_almost_equal(np.tile(statistic, (y_test.shape[0], 1)), y_pred_test)
+
+
+def test_feature_names_in_and_n_features_in_(global_random_seed, n_samples=10):
+    pd = pytest.importorskip("pandas")
+
+    random_state = np.random.RandomState(seed=global_random_seed)
+
+    X = pd.DataFrame([[0]] * n_samples, columns=["feature_1"])
+    y = random_state.rand(n_samples)
+
+    est = DummyRegressor().fit(X, y)
+    assert hasattr(est, "feature_names_in_")
+    assert hasattr(est, "n_features_in_")
+
+    est = DummyClassifier().fit(X, y)
+    assert hasattr(est, "feature_names_in_")
+    assert hasattr(est, "n_features_in_")
+
+
+def test_most_frequent_and_prior_strategy():
+    X = [[0], [0], [0], [0]]  # ignored
+    y = [1, 2, 1, 1]
+
+    for strategy in ("most_frequent", "prior"):
+        clf = DummyClassifier(strategy=strategy, random_state=0)
+        clf.fit(X, y)
+        assert_array_equal(clf.predict(X), np.ones(len(X)))
+        _check_predict_proba(clf, X, y)
+
+        if strategy == "prior":
+            assert_array_almost_equal(
+                clf.predict_proba([X[0]]), clf.class_prior_.reshape((1, -1))
+            )
+        else:
+            assert_array_almost_equal(
+                clf.predict_proba([X[0]]), clf.class_prior_.reshape((1, -1)) > 0.5
+            )
+
+
+def test_most_frequent_and_prior_strategy_with_2d_column_y():
+    # non-regression test added in
+    # https://github.com/scikit-learn/scikit-learn/pull/13545
+    X = [[0], [0], [0], [0]]
+    y_1d = [1, 2, 1, 1]
+    y_2d = [[1], [2], [1], [1]]
+
+    for strategy in ("most_frequent", "prior"):
+        clf_1d = DummyClassifier(strategy=strategy, random_state=0)
+        clf_2d = DummyClassifier(strategy=strategy, random_state=0)
+
+        clf_1d.fit(X, y_1d)
+        clf_2d.fit(X, y_2d)
+        assert_array_equal(clf_1d.predict(X), clf_2d.predict(X))
+
+
+def test_most_frequent_and_prior_strategy_multioutput():
+    X = [[0], [0], [0], [0]]  # ignored
+    y = np.array([[1, 0], [2, 0], [1, 0], [1, 3]])
+
+    n_samples = len(X)
+
+    for strategy in ("prior", "most_frequent"):
+        clf = DummyClassifier(strategy=strategy, random_state=0)
+        clf.fit(X, y)
+        assert_array_equal(
+            clf.predict(X),
+            np.hstack([np.ones((n_samples, 1)), np.zeros((n_samples, 1))]),
+        )
+        _check_predict_proba(clf, X, y)
+        _check_behavior_2d(clf)
+
+
+def test_stratified_strategy(global_random_seed):
+    X = [[0]] * 5  # ignored
+    y = [1, 2, 1, 1, 2]
+    clf = DummyClassifier(strategy="stratified", random_state=global_random_seed)
+    clf.fit(X, y)
+
+    X = [[0]] * 500
+    y_pred = clf.predict(X)
+    p = np.bincount(y_pred) / float(len(X))
+    assert_almost_equal(p[1], 3.0 / 5, decimal=1)
+    assert_almost_equal(p[2], 2.0 / 5, decimal=1)
+    _check_predict_proba(clf, X, y)
+
+
+def test_stratified_strategy_multioutput(global_random_seed):
+    X = [[0]] * 5  # ignored
+    y = np.array([[2, 1], [2, 2], [1, 1], [1, 2], [1, 1]])
+
+    clf = DummyClassifier(strategy="stratified", random_state=global_random_seed)
+    clf.fit(X, y)
+
+    X = [[0]] * 500
+    y_pred = clf.predict(X)
+
+    for k in range(y.shape[1]):
+        p = np.bincount(y_pred[:, k]) / float(len(X))
+        assert_almost_equal(p[1], 3.0 / 5, decimal=1)
+        assert_almost_equal(p[2], 2.0 / 5, decimal=1)
+        _check_predict_proba(clf, X, y)
+
+    _check_behavior_2d(clf)
+
+
+def test_uniform_strategy(global_random_seed):
+    X = [[0]] * 4  # ignored
+    y = [1, 2, 1, 1]
+    clf = DummyClassifier(strategy="uniform", random_state=global_random_seed)
+    clf.fit(X, y)
+
+    X = [[0]] * 500
+    y_pred = clf.predict(X)
+    p = np.bincount(y_pred) / float(len(X))
+    assert_almost_equal(p[1], 0.5, decimal=1)
+    assert_almost_equal(p[2], 0.5, decimal=1)
+    _check_predict_proba(clf, X, y)
+
+
+def test_uniform_strategy_multioutput(global_random_seed):
+    X = [[0]] * 4  # ignored
+    y = np.array([[2, 1], [2, 2], [1, 2], [1, 1]])
+    clf = DummyClassifier(strategy="uniform", random_state=global_random_seed)
+    clf.fit(X, y)
+
+    X = [[0]] * 500
+    y_pred = clf.predict(X)
+
+    for k in range(y.shape[1]):
+        p = np.bincount(y_pred[:, k]) / float(len(X))
+        assert_almost_equal(p[1], 0.5, decimal=1)
+        assert_almost_equal(p[2], 0.5, decimal=1)
+        _check_predict_proba(clf, X, y)
+
+    _check_behavior_2d(clf)
+
+
+def test_string_labels():
+    X = [[0]] * 5
+    y = ["paris", "paris", "tokyo", "amsterdam", "berlin"]
+    clf = DummyClassifier(strategy="most_frequent")
+    clf.fit(X, y)
+    assert_array_equal(clf.predict(X), ["paris"] * 5)
+
+
+@pytest.mark.parametrize(
+    "y,y_test",
+    [
+        ([2, 1, 1, 1], [2, 2, 1, 1]),
+        (
+            np.array([[2, 2], [1, 1], [1, 1], [1, 1]]),
+            np.array([[2, 2], [2, 2], [1, 1], [1, 1]]),
+        ),
+    ],
+)
+def test_classifier_score_with_None(y, y_test):
+    clf = DummyClassifier(strategy="most_frequent")
+    clf.fit(None, y)
+    assert clf.score(None, y_test) == 0.5
+
+
+@pytest.mark.parametrize(
+    "strategy", ["stratified", "most_frequent", "prior", "uniform", "constant"]
+)
+def test_classifier_prediction_independent_of_X(strategy, global_random_seed):
+    y = [0, 2, 1, 1]
+    X1 = [[0]] * 4
+    clf1 = DummyClassifier(
+        strategy=strategy, random_state=global_random_seed, constant=0
+    )
+    clf1.fit(X1, y)
+    predictions1 = clf1.predict(X1)
+
+    X2 = [[1]] * 4
+    clf2 = DummyClassifier(
+        strategy=strategy, random_state=global_random_seed, constant=0
+    )
+    clf2.fit(X2, y)
+    predictions2 = clf2.predict(X2)
+
+    assert_array_equal(predictions1, predictions2)
+
+
+def test_mean_strategy_regressor(global_random_seed):
+    random_state = np.random.RandomState(seed=global_random_seed)
+
+    X = [[0]] * 4  # ignored
+    y = random_state.randn(4)
+
+    reg = DummyRegressor()
+    reg.fit(X, y)
+    assert_array_equal(reg.predict(X), [np.mean(y)] * len(X))
+
+
+def test_mean_strategy_multioutput_regressor(global_random_seed):
+    random_state = np.random.RandomState(seed=global_random_seed)
+
+    X_learn = random_state.randn(10, 10)
+    y_learn = random_state.randn(10, 5)
+
+    mean = np.mean(y_learn, axis=0).reshape((1, -1))
+
+    X_test = random_state.randn(20, 10)
+    y_test = random_state.randn(20, 5)
+
+    # Correctness oracle
+    est = DummyRegressor()
+    est.fit(X_learn, y_learn)
+    y_pred_learn = est.predict(X_learn)
+    y_pred_test = est.predict(X_test)
+
+    _check_equality_regressor(mean, y_learn, y_pred_learn, y_test, y_pred_test)
+    _check_behavior_2d(est)
+
+
+def test_regressor_exceptions():
+    reg = DummyRegressor()
+    with pytest.raises(NotFittedError):
+        reg.predict([])
+
+
+def test_median_strategy_regressor(global_random_seed):
+    random_state = np.random.RandomState(seed=global_random_seed)
+
+    X = [[0]] * 5  # ignored
+    y = random_state.randn(5)
+
+    reg = DummyRegressor(strategy="median")
+    reg.fit(X, y)
+    assert_array_equal(reg.predict(X), [np.median(y)] * len(X))
+
+
+def test_median_strategy_multioutput_regressor(global_random_seed):
+    random_state = np.random.RandomState(seed=global_random_seed)
+
+    X_learn = random_state.randn(10, 10)
+    y_learn = random_state.randn(10, 5)
+
+    median = np.median(y_learn, axis=0).reshape((1, -1))
+
+    X_test = random_state.randn(20, 10)
+    y_test = random_state.randn(20, 5)
+
+    # Correctness oracle
+    est = DummyRegressor(strategy="median")
+    est.fit(X_learn, y_learn)
+    y_pred_learn = est.predict(X_learn)
+    y_pred_test = est.predict(X_test)
+
+    _check_equality_regressor(median, y_learn, y_pred_learn, y_test, y_pred_test)
+    _check_behavior_2d(est)
+
+
+def test_quantile_strategy_regressor(global_random_seed):
+    random_state = np.random.RandomState(seed=global_random_seed)
+
+    X = [[0]] * 5  # ignored
+    y = random_state.randn(5)
+
+    reg = DummyRegressor(strategy="quantile", quantile=0.5)
+    reg.fit(X, y)
+    assert_array_equal(reg.predict(X), [np.median(y)] * len(X))
+
+    reg = DummyRegressor(strategy="quantile", quantile=0)
+    reg.fit(X, y)
+    assert_array_equal(reg.predict(X), [np.min(y)] * len(X))
+
+    reg = DummyRegressor(strategy="quantile", quantile=1)
+    reg.fit(X, y)
+    assert_array_equal(reg.predict(X), [np.max(y)] * len(X))
+
+    reg = DummyRegressor(strategy="quantile", quantile=0.3)
+    reg.fit(X, y)
+    assert_array_equal(reg.predict(X), [np.percentile(y, q=30)] * len(X))
+
+
+def test_quantile_strategy_multioutput_regressor(global_random_seed):
+    random_state = np.random.RandomState(seed=global_random_seed)
+
+    X_learn = random_state.randn(10, 10)
+    y_learn = random_state.randn(10, 5)
+
+    median = np.median(y_learn, axis=0).reshape((1, -1))
+    quantile_values = np.percentile(y_learn, axis=0, q=80).reshape((1, -1))
+
+    X_test = random_state.randn(20, 10)
+    y_test = random_state.randn(20, 5)
+
+    # Correctness oracle
+    est = DummyRegressor(strategy="quantile", quantile=0.5)
+    est.fit(X_learn, y_learn)
+    y_pred_learn = est.predict(X_learn)
+    y_pred_test = est.predict(X_test)
+
+    _check_equality_regressor(median, y_learn, y_pred_learn, y_test, y_pred_test)
+    _check_behavior_2d(est)
+
+    # Correctness oracle
+    est = DummyRegressor(strategy="quantile", quantile=0.8)
+    est.fit(X_learn, y_learn)
+    y_pred_learn = est.predict(X_learn)
+    y_pred_test = est.predict(X_test)
+
+    _check_equality_regressor(
+        quantile_values, y_learn, y_pred_learn, y_test, y_pred_test
+    )
+    _check_behavior_2d(est)
+
+
+def test_quantile_invalid():
+    X = [[0]] * 5  # ignored
+    y = [0] * 5  # ignored
+
+    est = DummyRegressor(strategy="quantile", quantile=None)
+    err_msg = (
+        "When using `strategy='quantile', you have to specify the desired quantile"
+    )
+    with pytest.raises(ValueError, match=err_msg):
+        est.fit(X, y)
+
+
+def test_quantile_strategy_empty_train():
+    est = DummyRegressor(strategy="quantile", quantile=0.4)
+    with pytest.raises(IndexError):
+        est.fit([], [])
+
+
+def test_constant_strategy_regressor(global_random_seed):
+    random_state = np.random.RandomState(seed=global_random_seed)
+
+    X = [[0]] * 5  # ignored
+    y = random_state.randn(5)
+
+    reg = DummyRegressor(strategy="constant", constant=[43])
+    reg.fit(X, y)
+    assert_array_equal(reg.predict(X), [43] * len(X))
+
+    reg = DummyRegressor(strategy="constant", constant=43)
+    reg.fit(X, y)
+    assert_array_equal(reg.predict(X), [43] * len(X))
+
+    # non-regression test for #22478
+    assert not isinstance(reg.constant, np.ndarray)
+
+
+def test_constant_strategy_multioutput_regressor(global_random_seed):
+    random_state = np.random.RandomState(seed=global_random_seed)
+
+    X_learn = random_state.randn(10, 10)
+    y_learn = random_state.randn(10, 5)
+
+    # test with 2d array
+    constants = random_state.randn(5)
+
+    X_test = random_state.randn(20, 10)
+    y_test = random_state.randn(20, 5)
+
+    # Correctness oracle
+    est = DummyRegressor(strategy="constant", constant=constants)
+    est.fit(X_learn, y_learn)
+    y_pred_learn = est.predict(X_learn)
+    y_pred_test = est.predict(X_test)
+
+    _check_equality_regressor(constants, y_learn, y_pred_learn, y_test, y_pred_test)
+    _check_behavior_2d_for_constant(est)
+
+
+def test_y_mean_attribute_regressor():
+    X = [[0]] * 5
+    y = [1, 2, 4, 6, 8]
+    # when strategy = 'mean'
+    est = DummyRegressor(strategy="mean")
+    est.fit(X, y)
+
+    assert est.constant_ == np.mean(y)
+
+
+def test_constants_not_specified_regressor():
+    X = [[0]] * 5
+    y = [1, 2, 4, 6, 8]
+
+    est = DummyRegressor(strategy="constant")
+    err_msg = "Constant target value has to be specified"
+    with pytest.raises(TypeError, match=err_msg):
+        est.fit(X, y)
+
+
+def test_constant_size_multioutput_regressor(global_random_seed):
+    random_state = np.random.RandomState(seed=global_random_seed)
+    X = random_state.randn(10, 10)
+    y = random_state.randn(10, 5)
+
+    est = DummyRegressor(strategy="constant", constant=[1, 2, 3, 4])
+    err_msg = r"Constant target value should have shape \(5, 1\)."
+    with pytest.raises(ValueError, match=err_msg):
+        est.fit(X, y)
+
+
+def test_constant_strategy():
+    X = [[0], [0], [0], [0]]  # ignored
+    y = [2, 1, 2, 2]
+
+    clf = DummyClassifier(strategy="constant", random_state=0, constant=1)
+    clf.fit(X, y)
+    assert_array_equal(clf.predict(X), np.ones(len(X)))
+    _check_predict_proba(clf, X, y)
+
+    X = [[0], [0], [0], [0]]  # ignored
+    y = ["two", "one", "two", "two"]
+    clf = DummyClassifier(strategy="constant", random_state=0, constant="one")
+    clf.fit(X, y)
+    assert_array_equal(clf.predict(X), np.array(["one"] * 4))
+    _check_predict_proba(clf, X, y)
+
+
+def test_constant_strategy_multioutput():
+    X = [[0], [0], [0], [0]]  # ignored
+    y = np.array([[2, 3], [1, 3], [2, 3], [2, 0]])
+
+    n_samples = len(X)
+
+    clf = DummyClassifier(strategy="constant", random_state=0, constant=[1, 0])
+    clf.fit(X, y)
+    assert_array_equal(
+        clf.predict(X), np.hstack([np.ones((n_samples, 1)), np.zeros((n_samples, 1))])
+    )
+    _check_predict_proba(clf, X, y)
+
+
+@pytest.mark.parametrize(
+    "y, params, err_msg",
+    [
+        ([2, 1, 2, 2], {"random_state": 0}, "Constant.*has to be specified"),
+        ([2, 1, 2, 2], {"constant": [2, 0]}, "Constant.*should have shape"),
+        (
+            np.transpose([[2, 1, 2, 2], [2, 1, 2, 2]]),
+            {"constant": 2},
+            "Constant.*should have shape",
+        ),
+        (
+            [2, 1, 2, 2],
+            {"constant": "my-constant"},
+            "constant=my-constant.*Possible values.*\\[1, 2]",
+        ),
+        (
+            np.transpose([[2, 1, 2, 2], [2, 1, 2, 2]]),
+            {"constant": [2, "unknown"]},
+            "constant=\\[2, 'unknown'].*Possible values.*\\[1, 2]",
+        ),
+    ],
+    ids=[
+        "no-constant",
+        "too-many-constant",
+        "not-enough-output",
+        "single-output",
+        "multi-output",
+    ],
+)
+def test_constant_strategy_exceptions(y, params, err_msg):
+    X = [[0], [0], [0], [0]]
+
+    clf = DummyClassifier(strategy="constant", **params)
+    with pytest.raises(ValueError, match=err_msg):
+        clf.fit(X, y)
+
+
+def test_classification_sample_weight():
+    X = [[0], [0], [1]]
+    y = [0, 1, 0]
+    sample_weight = [0.1, 1.0, 0.1]
+
+    clf = DummyClassifier(strategy="stratified").fit(X, y, sample_weight)
+    assert_array_almost_equal(clf.class_prior_, [0.2 / 1.2, 1.0 / 1.2])
+
+
+@pytest.mark.parametrize("csc_container", CSC_CONTAINERS)
+def test_constant_strategy_sparse_target(csc_container):
+    X = [[0]] * 5  # ignored
+    y = csc_container(np.array([[0, 1], [4, 0], [1, 1], [1, 4], [1, 1]]))
+
+    n_samples = len(X)
+
+    clf = DummyClassifier(strategy="constant", random_state=0, constant=[1, 0])
+    clf.fit(X, y)
+    y_pred = clf.predict(X)
+    assert sp.issparse(y_pred)
+    assert_array_equal(
+        y_pred.toarray(), np.hstack([np.ones((n_samples, 1)), np.zeros((n_samples, 1))])
+    )
+
+
+@pytest.mark.parametrize("csc_container", CSC_CONTAINERS)
+def test_uniform_strategy_sparse_target_warning(global_random_seed, csc_container):
+    X = [[0]] * 5  # ignored
+    y = csc_container(np.array([[2, 1], [2, 2], [1, 4], [4, 2], [1, 1]]))
+
+    clf = DummyClassifier(strategy="uniform", random_state=global_random_seed)
+    with pytest.warns(UserWarning, match="the uniform strategy would not save memory"):
+        clf.fit(X, y)
+
+    X = [[0]] * 500
+    y_pred = clf.predict(X)
+
+    for k in range(y.shape[1]):
+        p = np.bincount(y_pred[:, k]) / float(len(X))
+        assert_almost_equal(p[1], 1 / 3, decimal=1)
+        assert_almost_equal(p[2], 1 / 3, decimal=1)
+        assert_almost_equal(p[4], 1 / 3, decimal=1)
+
+
+@pytest.mark.parametrize("csc_container", CSC_CONTAINERS)
+def test_stratified_strategy_sparse_target(global_random_seed, csc_container):
+    X = [[0]] * 5  # ignored
+    y = csc_container(np.array([[4, 1], [0, 0], [1, 1], [1, 4], [1, 1]]))
+
+    clf = DummyClassifier(strategy="stratified", random_state=global_random_seed)
+    clf.fit(X, y)
+
+    X = [[0]] * 500
+    y_pred = clf.predict(X)
+    assert sp.issparse(y_pred)
+    y_pred = y_pred.toarray()
+
+    for k in range(y.shape[1]):
+        p = np.bincount(y_pred[:, k]) / float(len(X))
+        assert_almost_equal(p[1], 3.0 / 5, decimal=1)
+        assert_almost_equal(p[0], 1.0 / 5, decimal=1)
+        assert_almost_equal(p[4], 1.0 / 5, decimal=1)
+
+
+@pytest.mark.parametrize("csc_container", CSC_CONTAINERS)
+def test_most_frequent_and_prior_strategy_sparse_target(csc_container):
+    X = [[0]] * 5  # ignored
+    y = csc_container(np.array([[1, 0], [1, 3], [4, 0], [0, 1], [1, 0]]))
+
+    n_samples = len(X)
+    y_expected = np.hstack([np.ones((n_samples, 1)), np.zeros((n_samples, 1))])
+    for strategy in ("most_frequent", "prior"):
+        clf = DummyClassifier(strategy=strategy, random_state=0)
+        clf.fit(X, y)
+
+        y_pred = clf.predict(X)
+        assert sp.issparse(y_pred)
+        assert_array_equal(y_pred.toarray(), y_expected)
+
+
+def test_dummy_regressor_sample_weight(global_random_seed, n_samples=10):
+    random_state = np.random.RandomState(seed=global_random_seed)
+
+    X = [[0]] * n_samples
+    y = random_state.rand(n_samples)
+    sample_weight = random_state.rand(n_samples)
+
+    est = DummyRegressor(strategy="mean").fit(X, y, sample_weight)
+    assert est.constant_ == np.average(y, weights=sample_weight)
+
+    est = DummyRegressor(strategy="median").fit(X, y, sample_weight)
+    assert est.constant_ == _weighted_percentile(y, sample_weight, 50.0)
+
+    est = DummyRegressor(strategy="quantile", quantile=0.95).fit(X, y, sample_weight)
+    assert est.constant_ == _weighted_percentile(y, sample_weight, 95.0)
+
+
+def test_dummy_regressor_on_3D_array():
+    X = np.array([[["foo"]], [["bar"]], [["baz"]]])
+    y = np.array([2, 2, 2])
+    y_expected = np.array([2, 2, 2])
+    cls = DummyRegressor()
+    cls.fit(X, y)
+    y_pred = cls.predict(X)
+    assert_array_equal(y_pred, y_expected)
+
+
+def test_dummy_classifier_on_3D_array():
+    X = np.array([[["foo"]], [["bar"]], [["baz"]]])
+    y = [2, 2, 2]
+    y_expected = [2, 2, 2]
+    y_proba_expected = [[1], [1], [1]]
+    cls = DummyClassifier(strategy="stratified")
+    cls.fit(X, y)
+    y_pred = cls.predict(X)
+    y_pred_proba = cls.predict_proba(X)
+    assert_array_equal(y_pred, y_expected)
+    assert_array_equal(y_pred_proba, y_proba_expected)
+
+
+def test_dummy_regressor_return_std():
+    X = [[0]] * 3  # ignored
+    y = np.array([2, 2, 2])
+    y_std_expected = np.array([0, 0, 0])
+    cls = DummyRegressor()
+    cls.fit(X, y)
+    y_pred_list = cls.predict(X, return_std=True)
+    # there should be two elements when return_std is True
+    assert len(y_pred_list) == 2
+    # the second element should be all zeros
+    assert_array_equal(y_pred_list[1], y_std_expected)
+
+
+@pytest.mark.parametrize(
+    "y,y_test",
+    [
+        ([1, 1, 1, 2], [1.25] * 4),
+        (np.array([[2, 2], [1, 1], [1, 1], [1, 1]]), [[1.25, 1.25]] * 4),
+    ],
+)
+def test_regressor_score_with_None(y, y_test):
+    reg = DummyRegressor()
+    reg.fit(None, y)
+    assert reg.score(None, y_test) == 1.0
+
+
+@pytest.mark.parametrize("strategy", ["mean", "median", "quantile", "constant"])
+def test_regressor_prediction_independent_of_X(strategy):
+    y = [0, 2, 1, 1]
+    X1 = [[0]] * 4
+    reg1 = DummyRegressor(strategy=strategy, constant=0, quantile=0.7)
+    reg1.fit(X1, y)
+    predictions1 = reg1.predict(X1)
+
+    X2 = [[1]] * 4
+    reg2 = DummyRegressor(strategy=strategy, constant=0, quantile=0.7)
+    reg2.fit(X2, y)
+    predictions2 = reg2.predict(X2)
+
+    assert_array_equal(predictions1, predictions2)
+
+
+@pytest.mark.parametrize(
+    "strategy", ["stratified", "most_frequent", "prior", "uniform", "constant"]
+)
+def test_dtype_of_classifier_probas(strategy):
+    y = [0, 2, 1, 1]
+    X = np.zeros(4)
+    model = DummyClassifier(strategy=strategy, random_state=0, constant=0)
+    probas = model.fit(X, y).predict_proba(X)
+
+    assert probas.dtype == np.float64
@@ -0,0 +1,20 @@
+# Basic unittests to test functioning of module's top-level
+
+
+__author__ = "Yaroslav Halchenko"
+__license__ = "BSD"
+
+
+try:
+    from sklearn import *  # noqa
+
+    _top_import_error = None
+except Exception as e:
+    _top_import_error = e
+
+
+def test_import_skl():
+    # Test either above import has failed for some reason
+    # "import *" is discouraged outside of the module level, hence we
+    # rely on setting up the variable above
+    assert _top_import_error is None
@@ -0,0 +1,702 @@
+import copy
+import pickle
+import warnings
+
+import numpy as np
+import pytest
+from scipy.special import expit
+
+import sklearn
+from sklearn.datasets import make_regression
+from sklearn.isotonic import (
+    IsotonicRegression,
+    _make_unique,
+    check_increasing,
+    isotonic_regression,
+)
+from sklearn.utils import shuffle
+from sklearn.utils._testing import (
+    assert_allclose,
+    assert_array_almost_equal,
+    assert_array_equal,
+)
+from sklearn.utils.validation import check_array
+
+
+def test_permutation_invariance():
+    # check that fit is permutation invariant.
+    # regression test of missing sorting of sample-weights
+    ir = IsotonicRegression()
+    x = [1, 2, 3, 4, 5, 6, 7]
+    y = [1, 41, 51, 1, 2, 5, 24]
+    sample_weight = [1, 2, 3, 4, 5, 6, 7]
+    x_s, y_s, sample_weight_s = shuffle(x, y, sample_weight, random_state=0)
+    y_transformed = ir.fit_transform(x, y, sample_weight=sample_weight)
+    y_transformed_s = ir.fit(x_s, y_s, sample_weight=sample_weight_s).transform(x)
+
+    assert_array_equal(y_transformed, y_transformed_s)
+
+
+def test_check_increasing_small_number_of_samples():
+    x = [0, 1, 2]
+    y = [1, 1.1, 1.05]
+
+    with warnings.catch_warnings():
+        warnings.simplefilter("error", UserWarning)
+        is_increasing = check_increasing(x, y)
+
+    assert is_increasing
+
+
+def test_check_increasing_up():
+    x = [0, 1, 2, 3, 4, 5]
+    y = [0, 1.5, 2.77, 8.99, 8.99, 50]
+
+    # Check that we got increasing=True and no warnings
+    with warnings.catch_warnings():
+        warnings.simplefilter("error", UserWarning)
+        is_increasing = check_increasing(x, y)
+
+    assert is_increasing
+
+
+def test_check_increasing_up_extreme():
+    x = [0, 1, 2, 3, 4, 5]
+    y = [0, 1, 2, 3, 4, 5]
+
+    # Check that we got increasing=True and no warnings
+    with warnings.catch_warnings():
+        warnings.simplefilter("error", UserWarning)
+        is_increasing = check_increasing(x, y)
+
+    assert is_increasing
+
+
+def test_check_increasing_down():
+    x = [0, 1, 2, 3, 4, 5]
+    y = [0, -1.5, -2.77, -8.99, -8.99, -50]
+
+    # Check that we got increasing=False and no warnings
+    with warnings.catch_warnings():
+        warnings.simplefilter("error", UserWarning)
+        is_increasing = check_increasing(x, y)
+
+    assert not is_increasing
+
+
+def test_check_increasing_down_extreme():
+    x = [0, 1, 2, 3, 4, 5]
+    y = [0, -1, -2, -3, -4, -5]
+
+    # Check that we got increasing=False and no warnings
+    with warnings.catch_warnings():
+        warnings.simplefilter("error", UserWarning)
+        is_increasing = check_increasing(x, y)
+
+    assert not is_increasing
+
+
+def test_check_ci_warn():
+    x = [0, 1, 2, 3, 4, 5]
+    y = [0, -1, 2, -3, 4, -5]
+
+    # Check that we got increasing=False and CI interval warning
+    msg = "interval"
+    with pytest.warns(UserWarning, match=msg):
+        is_increasing = check_increasing(x, y)
+
+    assert not is_increasing
+
+
+def test_isotonic_regression():
+    y = np.array([3, 7, 5, 9, 8, 7, 10])
+    y_ = np.array([3, 6, 6, 8, 8, 8, 10])
+    assert_array_equal(y_, isotonic_regression(y))
+
+    y = np.array([10, 0, 2])
+    y_ = np.array([4, 4, 4])
+    assert_array_equal(y_, isotonic_regression(y))
+
+    x = np.arange(len(y))
+    ir = IsotonicRegression(y_min=0.0, y_max=1.0)
+    ir.fit(x, y)
+    assert_array_equal(ir.fit(x, y).transform(x), ir.fit_transform(x, y))
+    assert_array_equal(ir.transform(x), ir.predict(x))
+
+    # check that it is immune to permutation
+    perm = np.random.permutation(len(y))
+    ir = IsotonicRegression(y_min=0.0, y_max=1.0)
+    assert_array_equal(ir.fit_transform(x[perm], y[perm]), ir.fit_transform(x, y)[perm])
+    assert_array_equal(ir.transform(x[perm]), ir.transform(x)[perm])
+
+    # check we don't crash when all x are equal:
+    ir = IsotonicRegression()
+    assert_array_equal(ir.fit_transform(np.ones(len(x)), y), np.mean(y))
+
+
+def test_isotonic_regression_ties_min():
+    # Setup examples with ties on minimum
+    x = [1, 1, 2, 3, 4, 5]
+    y = [1, 2, 3, 4, 5, 6]
+    y_true = [1.5, 1.5, 3, 4, 5, 6]
+
+    # Check that we get identical results for fit/transform and fit_transform
+    ir = IsotonicRegression()
+    ir.fit(x, y)
+    assert_array_equal(ir.fit(x, y).transform(x), ir.fit_transform(x, y))
+    assert_array_equal(y_true, ir.fit_transform(x, y))
+
+
+def test_isotonic_regression_ties_max():
+    # Setup examples with ties on maximum
+    x = [1, 2, 3, 4, 5, 5]
+    y = [1, 2, 3, 4, 5, 6]
+    y_true = [1, 2, 3, 4, 5.5, 5.5]
+
+    # Check that we get identical results for fit/transform and fit_transform
+    ir = IsotonicRegression()
+    ir.fit(x, y)
+    assert_array_equal(ir.fit(x, y).transform(x), ir.fit_transform(x, y))
+    assert_array_equal(y_true, ir.fit_transform(x, y))
+
+
+def test_isotonic_regression_ties_secondary_():
+    """
+    Test isotonic regression fit, transform  and fit_transform
+    against the "secondary" ties method and "pituitary" data from R
+     "isotone" package, as detailed in: J. d. Leeuw, K. Hornik, P. Mair,
+     Isotone Optimization in R: Pool-Adjacent-Violators Algorithm
+    (PAVA) and Active Set Methods
+
+    Set values based on pituitary example and
+     the following R command detailed in the paper above:
+    > library("isotone")
+    > data("pituitary")
+    > res1 <- gpava(pituitary$age, pituitary$size, ties="secondary")
+    > res1$x
+
+    `isotone` version: 1.0-2, 2014-09-07
+    R version: R version 3.1.1 (2014-07-10)
+    """
+    x = [8, 8, 8, 10, 10, 10, 12, 12, 12, 14, 14]
+    y = [21, 23.5, 23, 24, 21, 25, 21.5, 22, 19, 23.5, 25]
+    y_true = [
+        22.22222,
+        22.22222,
+        22.22222,
+        22.22222,
+        22.22222,
+        22.22222,
+        22.22222,
+        22.22222,
+        22.22222,
+        24.25,
+        24.25,
+    ]
+
+    # Check fit, transform and fit_transform
+    ir = IsotonicRegression()
+    ir.fit(x, y)
+    assert_array_almost_equal(ir.transform(x), y_true, 4)
+    assert_array_almost_equal(ir.fit_transform(x, y), y_true, 4)
+
+
+def test_isotonic_regression_with_ties_in_differently_sized_groups():
+    """
+    Non-regression test to handle issue 9432:
+    https://github.com/scikit-learn/scikit-learn/issues/9432
+
+    Compare against output in R:
+    > library("isotone")
+    > x <- c(0, 1, 1, 2, 3, 4)
+    > y <- c(0, 0, 1, 0, 0, 1)
+    > res1 <- gpava(x, y, ties="secondary")
+    > res1$x
+
+    `isotone` version: 1.1-0, 2015-07-24
+    R version: R version 3.3.2 (2016-10-31)
+    """
+    x = np.array([0, 1, 1, 2, 3, 4])
+    y = np.array([0, 0, 1, 0, 0, 1])
+    y_true = np.array([0.0, 0.25, 0.25, 0.25, 0.25, 1.0])
+    ir = IsotonicRegression()
+    ir.fit(x, y)
+    assert_array_almost_equal(ir.transform(x), y_true)
+    assert_array_almost_equal(ir.fit_transform(x, y), y_true)
+
+
+def test_isotonic_regression_reversed():
+    y = np.array([10, 9, 10, 7, 6, 6.1, 5])
+    y_ = IsotonicRegression(increasing=False).fit_transform(np.arange(len(y)), y)
+    assert_array_equal(np.ones(y_[:-1].shape), ((y_[:-1] - y_[1:]) >= 0))
+
+
+def test_isotonic_regression_auto_decreasing():
+    # Set y and x for decreasing
+    y = np.array([10, 9, 10, 7, 6, 6.1, 5])
+    x = np.arange(len(y))
+
+    # Create model and fit_transform
+    ir = IsotonicRegression(increasing="auto")
+    with warnings.catch_warnings(record=True) as w:
+        warnings.simplefilter("always")
+        y_ = ir.fit_transform(x, y)
+        # work-around for pearson divide warnings in scipy <= 0.17.0
+        assert all(["invalid value encountered in " in str(warn.message) for warn in w])
+
+    # Check that relationship decreases
+    is_increasing = y_[0] < y_[-1]
+    assert not is_increasing
+
+
+def test_isotonic_regression_auto_increasing():
+    # Set y and x for decreasing
+    y = np.array([5, 6.1, 6, 7, 10, 9, 10])
+    x = np.arange(len(y))
+
+    # Create model and fit_transform
+    ir = IsotonicRegression(increasing="auto")
+    with warnings.catch_warnings(record=True) as w:
+        warnings.simplefilter("always")
+        y_ = ir.fit_transform(x, y)
+        # work-around for pearson divide warnings in scipy <= 0.17.0
+        assert all(["invalid value encountered in " in str(warn.message) for warn in w])
+
+    # Check that relationship increases
+    is_increasing = y_[0] < y_[-1]
+    assert is_increasing
+
+
+def test_assert_raises_exceptions():
+    ir = IsotonicRegression()
+    rng = np.random.RandomState(42)
+
+    msg = "Found input variables with inconsistent numbers of samples"
+    with pytest.raises(ValueError, match=msg):
+        ir.fit([0, 1, 2], [5, 7, 3], [0.1, 0.6])
+
+    with pytest.raises(ValueError, match=msg):
+        ir.fit([0, 1, 2], [5, 7])
+
+    msg = "X should be a 1d array"
+    with pytest.raises(ValueError, match=msg):
+        ir.fit(rng.randn(3, 10), [0, 1, 2])
+
+    msg = "Isotonic regression input X should be a 1d array"
+    with pytest.raises(ValueError, match=msg):
+        ir.transform(rng.randn(3, 10))
+
+
+def test_isotonic_sample_weight_parameter_default_value():
+    # check if default value of sample_weight parameter is one
+    ir = IsotonicRegression()
+    # random test data
+    rng = np.random.RandomState(42)
+    n = 100
+    x = np.arange(n)
+    y = rng.randint(-50, 50, size=(n,)) + 50.0 * np.log(1 + np.arange(n))
+    # check if value is correctly used
+    weights = np.ones(n)
+    y_set_value = ir.fit_transform(x, y, sample_weight=weights)
+    y_default_value = ir.fit_transform(x, y)
+
+    assert_array_equal(y_set_value, y_default_value)
+
+
+def test_isotonic_min_max_boundaries():
+    # check if min value is used correctly
+    ir = IsotonicRegression(y_min=2, y_max=4)
+    n = 6
+    x = np.arange(n)
+    y = np.arange(n)
+    y_test = [2, 2, 2, 3, 4, 4]
+    y_result = np.round(ir.fit_transform(x, y))
+    assert_array_equal(y_result, y_test)
+
+
+def test_isotonic_sample_weight():
+    ir = IsotonicRegression()
+    x = [1, 2, 3, 4, 5, 6, 7]
+    y = [1, 41, 51, 1, 2, 5, 24]
+    sample_weight = [1, 2, 3, 4, 5, 6, 7]
+    expected_y = [1, 13.95, 13.95, 13.95, 13.95, 13.95, 24]
+    received_y = ir.fit_transform(x, y, sample_weight=sample_weight)
+
+    assert_array_equal(expected_y, received_y)
+
+
+def test_isotonic_regression_oob_raise():
+    # Set y and x
+    y = np.array([3, 7, 5, 9, 8, 7, 10])
+    x = np.arange(len(y))
+
+    # Create model and fit
+    ir = IsotonicRegression(increasing="auto", out_of_bounds="raise")
+    ir.fit(x, y)
+
+    # Check that an exception is thrown
+    msg = "in x_new is below the interpolation range"
+    with pytest.raises(ValueError, match=msg):
+        ir.predict([min(x) - 10, max(x) + 10])
+
+
+def test_isotonic_regression_oob_clip():
+    # Set y and x
+    y = np.array([3, 7, 5, 9, 8, 7, 10])
+    x = np.arange(len(y))
+
+    # Create model and fit
+    ir = IsotonicRegression(increasing="auto", out_of_bounds="clip")
+    ir.fit(x, y)
+
+    # Predict from  training and test x and check that min/max match.
+    y1 = ir.predict([min(x) - 10, max(x) + 10])
+    y2 = ir.predict(x)
+    assert max(y1) == max(y2)
+    assert min(y1) == min(y2)
+
+
+def test_isotonic_regression_oob_nan():
+    # Set y and x
+    y = np.array([3, 7, 5, 9, 8, 7, 10])
+    x = np.arange(len(y))
+
+    # Create model and fit
+    ir = IsotonicRegression(increasing="auto", out_of_bounds="nan")
+    ir.fit(x, y)
+
+    # Predict from  training and test x and check that we have two NaNs.
+    y1 = ir.predict([min(x) - 10, max(x) + 10])
+    assert sum(np.isnan(y1)) == 2
+
+
+def test_isotonic_regression_pickle():
+    y = np.array([3, 7, 5, 9, 8, 7, 10])
+    x = np.arange(len(y))
+
+    # Create model and fit
+    ir = IsotonicRegression(increasing="auto", out_of_bounds="clip")
+    ir.fit(x, y)
+
+    ir_ser = pickle.dumps(ir, pickle.HIGHEST_PROTOCOL)
+    ir2 = pickle.loads(ir_ser)
+    np.testing.assert_array_equal(ir.predict(x), ir2.predict(x))
+
+
+def test_isotonic_duplicate_min_entry():
+    x = [0, 0, 1]
+    y = [0, 0, 1]
+
+    ir = IsotonicRegression(increasing=True, out_of_bounds="clip")
+    ir.fit(x, y)
+    all_predictions_finite = np.all(np.isfinite(ir.predict(x)))
+    assert all_predictions_finite
+
+
+def test_isotonic_ymin_ymax():
+    # Test from @NelleV's issue:
+    # https://github.com/scikit-learn/scikit-learn/issues/6921
+    x = np.array(
+        [
+            1.263,
+            1.318,
+            -0.572,
+            0.307,
+            -0.707,
+            -0.176,
+            -1.599,
+            1.059,
+            1.396,
+            1.906,
+            0.210,
+            0.028,
+            -0.081,
+            0.444,
+            0.018,
+            -0.377,
+            -0.896,
+            -0.377,
+            -1.327,
+            0.180,
+        ]
+    )
+    y = isotonic_regression(x, y_min=0.0, y_max=0.1)
+
+    assert np.all(y >= 0)
+    assert np.all(y <= 0.1)
+
+    # Also test decreasing case since the logic there is different
+    y = isotonic_regression(x, y_min=0.0, y_max=0.1, increasing=False)
+
+    assert np.all(y >= 0)
+    assert np.all(y <= 0.1)
+
+    # Finally, test with only one bound
+    y = isotonic_regression(x, y_min=0.0, increasing=False)
+
+    assert np.all(y >= 0)
+
+
+def test_isotonic_zero_weight_loop():
+    # Test from @ogrisel's issue:
+    # https://github.com/scikit-learn/scikit-learn/issues/4297
+
+    # Get deterministic RNG with seed
+    rng = np.random.RandomState(42)
+
+    # Create regression and samples
+    regression = IsotonicRegression()
+    n_samples = 50
+    x = np.linspace(-3, 3, n_samples)
+    y = x + rng.uniform(size=n_samples)
+
+    # Get some random weights and zero out
+    w = rng.uniform(size=n_samples)
+    w[5:8] = 0
+    regression.fit(x, y, sample_weight=w)
+
+    # This will hang in failure case.
+    regression.fit(x, y, sample_weight=w)
+
+
+def test_fast_predict():
+    # test that the faster prediction change doesn't
+    # affect out-of-sample predictions:
+    # https://github.com/scikit-learn/scikit-learn/pull/6206
+    rng = np.random.RandomState(123)
+    n_samples = 10**3
+    # X values over the -10,10 range
+    X_train = 20.0 * rng.rand(n_samples) - 10
+    y_train = (
+        np.less(rng.rand(n_samples), expit(X_train)).astype("int64").astype("float64")
+    )
+
+    weights = rng.rand(n_samples)
+    # we also want to test that everything still works when some weights are 0
+    weights[rng.rand(n_samples) < 0.1] = 0
+
+    slow_model = IsotonicRegression(y_min=0, y_max=1, out_of_bounds="clip")
+    fast_model = IsotonicRegression(y_min=0, y_max=1, out_of_bounds="clip")
+
+    # Build interpolation function with ALL input data, not just the
+    # non-redundant subset. The following 2 lines are taken from the
+    # .fit() method, without removing unnecessary points
+    X_train_fit, y_train_fit = slow_model._build_y(
+        X_train, y_train, sample_weight=weights, trim_duplicates=False
+    )
+    slow_model._build_f(X_train_fit, y_train_fit)
+
+    # fit with just the necessary data
+    fast_model.fit(X_train, y_train, sample_weight=weights)
+
+    X_test = 20.0 * rng.rand(n_samples) - 10
+    y_pred_slow = slow_model.predict(X_test)
+    y_pred_fast = fast_model.predict(X_test)
+
+    assert_array_equal(y_pred_slow, y_pred_fast)
+
+
+def test_isotonic_copy_before_fit():
+    # https://github.com/scikit-learn/scikit-learn/issues/6628
+    ir = IsotonicRegression()
+    copy.copy(ir)
+
+
+def test_isotonic_dtype():
+    y = [2, 1, 4, 3, 5]
+    weights = np.array([0.9, 0.9, 0.9, 0.9, 0.9], dtype=np.float64)
+    reg = IsotonicRegression()
+
+    for dtype in (np.int32, np.int64, np.float32, np.float64):
+        for sample_weight in (None, weights.astype(np.float32), weights):
+            y_np = np.array(y, dtype=dtype)
+            expected_dtype = check_array(
+                y_np, dtype=[np.float64, np.float32], ensure_2d=False
+            ).dtype
+
+            res = isotonic_regression(y_np, sample_weight=sample_weight)
+            assert res.dtype == expected_dtype
+
+            X = np.arange(len(y)).astype(dtype)
+            reg.fit(X, y_np, sample_weight=sample_weight)
+            res = reg.predict(X)
+            assert res.dtype == expected_dtype
+
+
+@pytest.mark.parametrize("y_dtype", [np.int32, np.int64, np.float32, np.float64])
+def test_isotonic_mismatched_dtype(y_dtype):
+    # regression test for #15004
+    # check that data are converted when X and y dtype differ
+    reg = IsotonicRegression()
+    y = np.array([2, 1, 4, 3, 5], dtype=y_dtype)
+    X = np.arange(len(y), dtype=np.float32)
+    reg.fit(X, y)
+    assert reg.predict(X).dtype == X.dtype
+
+
+def test_make_unique_dtype():
+    x_list = [2, 2, 2, 3, 5]
+    for dtype in (np.float32, np.float64):
+        x = np.array(x_list, dtype=dtype)
+        y = x.copy()
+        w = np.ones_like(x)
+        x, y, w = _make_unique(x, y, w)
+        assert_array_equal(x, [2, 3, 5])
+
+
+@pytest.mark.parametrize("dtype", [np.float64, np.float32])
+def test_make_unique_tolerance(dtype):
+    # Check that equality takes account of np.finfo tolerance
+    x = np.array([0, 1e-16, 1, 1 + 1e-14], dtype=dtype)
+    y = x.copy()
+    w = np.ones_like(x)
+    x, y, w = _make_unique(x, y, w)
+    if dtype == np.float64:
+        x_out = np.array([0, 1, 1 + 1e-14])
+    else:
+        x_out = np.array([0, 1])
+    assert_array_equal(x, x_out)
+
+
+def test_isotonic_make_unique_tolerance():
+    # Check that averaging of targets for duplicate X is done correctly,
+    # taking into account tolerance
+    X = np.array([0, 1, 1 + 1e-16, 2], dtype=np.float64)
+    y = np.array([0, 1, 2, 3], dtype=np.float64)
+    ireg = IsotonicRegression().fit(X, y)
+    y_pred = ireg.predict([0, 0.5, 1, 1.5, 2])
+
+    assert_array_equal(y_pred, np.array([0, 0.75, 1.5, 2.25, 3]))
+    assert_array_equal(ireg.X_thresholds_, np.array([0.0, 1.0, 2.0]))
+    assert_array_equal(ireg.y_thresholds_, np.array([0.0, 1.5, 3.0]))
+
+
+def test_isotonic_non_regression_inf_slope():
+    # Non-regression test to ensure that inf values are not returned
+    # see: https://github.com/scikit-learn/scikit-learn/issues/10903
+    X = np.array([0.0, 4.1e-320, 4.4e-314, 1.0])
+    y = np.array([0.42, 0.42, 0.44, 0.44])
+    ireg = IsotonicRegression().fit(X, y)
+    y_pred = ireg.predict(np.array([0, 2.1e-319, 5.4e-316, 1e-10]))
+    assert np.all(np.isfinite(y_pred))
+
+
+@pytest.mark.parametrize("increasing", [True, False])
+def test_isotonic_thresholds(increasing):
+    rng = np.random.RandomState(42)
+    n_samples = 30
+    X = rng.normal(size=n_samples)
+    y = rng.normal(size=n_samples)
+    ireg = IsotonicRegression(increasing=increasing).fit(X, y)
+    X_thresholds, y_thresholds = ireg.X_thresholds_, ireg.y_thresholds_
+    assert X_thresholds.shape == y_thresholds.shape
+
+    # Input thresholds are a strict subset of the training set (unless
+    # the data is already strictly monotonic which is not the case with
+    # this random data)
+    assert X_thresholds.shape[0] < X.shape[0]
+    assert np.isin(X_thresholds, X).all()
+
+    # Output thresholds lie in the range of the training set:
+    assert y_thresholds.max() <= y.max()
+    assert y_thresholds.min() >= y.min()
+
+    assert all(np.diff(X_thresholds) > 0)
+    if increasing:
+        assert all(np.diff(y_thresholds) >= 0)
+    else:
+        assert all(np.diff(y_thresholds) <= 0)
+
+
+def test_input_shape_validation():
+    # Test from #15012
+    # Check that IsotonicRegression can handle 2darray with only 1 feature
+    X = np.arange(10)
+    X_2d = X.reshape(-1, 1)
+    y = np.arange(10)
+
+    iso_reg = IsotonicRegression().fit(X, y)
+    iso_reg_2d = IsotonicRegression().fit(X_2d, y)
+
+    assert iso_reg.X_max_ == iso_reg_2d.X_max_
+    assert iso_reg.X_min_ == iso_reg_2d.X_min_
+    assert iso_reg.y_max == iso_reg_2d.y_max
+    assert iso_reg.y_min == iso_reg_2d.y_min
+    assert_array_equal(iso_reg.X_thresholds_, iso_reg_2d.X_thresholds_)
+    assert_array_equal(iso_reg.y_thresholds_, iso_reg_2d.y_thresholds_)
+
+    y_pred1 = iso_reg.predict(X)
+    y_pred2 = iso_reg_2d.predict(X_2d)
+    assert_allclose(y_pred1, y_pred2)
+
+
+def test_isotonic_2darray_more_than_1_feature():
+    # Ensure IsotonicRegression raises error if input has more than 1 feature
+    X = np.arange(10)
+    X_2d = np.c_[X, X]
+    y = np.arange(10)
+
+    msg = "should be a 1d array or 2d array with 1 feature"
+    with pytest.raises(ValueError, match=msg):
+        IsotonicRegression().fit(X_2d, y)
+
+    iso_reg = IsotonicRegression().fit(X, y)
+    with pytest.raises(ValueError, match=msg):
+        iso_reg.predict(X_2d)
+
+    with pytest.raises(ValueError, match=msg):
+        iso_reg.transform(X_2d)
+
+
+def test_isotonic_regression_sample_weight_not_overwritten():
+    """Check that calling fitting function of isotonic regression will not
+    overwrite `sample_weight`.
+    Non-regression test for:
+    https://github.com/scikit-learn/scikit-learn/issues/20508
+    """
+    X, y = make_regression(n_samples=10, n_features=1, random_state=41)
+    sample_weight_original = np.ones_like(y)
+    sample_weight_original[0] = 10
+    sample_weight_fit = sample_weight_original.copy()
+
+    isotonic_regression(y, sample_weight=sample_weight_fit)
+    assert_allclose(sample_weight_fit, sample_weight_original)
+
+    IsotonicRegression().fit(X, y, sample_weight=sample_weight_fit)
+    assert_allclose(sample_weight_fit, sample_weight_original)
+
+
+@pytest.mark.parametrize("shape", ["1d", "2d"])
+def test_get_feature_names_out(shape):
+    """Check `get_feature_names_out` for `IsotonicRegression`."""
+    X = np.arange(10)
+    if shape == "2d":
+        X = X.reshape(-1, 1)
+    y = np.arange(10)
+
+    iso = IsotonicRegression().fit(X, y)
+    names = iso.get_feature_names_out()
+    assert isinstance(names, np.ndarray)
+    assert names.dtype == object
+    assert_array_equal(["isotonicregression0"], names)
+
+
+def test_isotonic_regression_output_predict():
+    """Check that `predict` does return the expected output type.
+
+    We need to check that `transform` will output a DataFrame and a NumPy array
+    when we set `transform_output` to `pandas`.
+
+    Non-regression test for:
+    https://github.com/scikit-learn/scikit-learn/issues/25499
+    """
+    pd = pytest.importorskip("pandas")
+    X, y = make_regression(n_samples=10, n_features=1, random_state=42)
+    regressor = IsotonicRegression()
+    with sklearn.config_context(transform_output="pandas"):
+        regressor.fit(X, y)
+        X_trans = regressor.transform(X)
+        y_pred = regressor.predict(X)
+
+    assert isinstance(X_trans, pd.DataFrame)
+    assert isinstance(y_pred, np.ndarray)
@@ -0,0 +1,489 @@
+import re
+
+import numpy as np
+import pytest
+
+from sklearn.datasets import make_classification
+from sklearn.kernel_approximation import (
+    AdditiveChi2Sampler,
+    Nystroem,
+    PolynomialCountSketch,
+    RBFSampler,
+    SkewedChi2Sampler,
+)
+from sklearn.metrics.pairwise import (
+    chi2_kernel,
+    kernel_metrics,
+    polynomial_kernel,
+    rbf_kernel,
+)
+from sklearn.utils._testing import (
+    assert_allclose,
+    assert_array_almost_equal,
+    assert_array_equal,
+)
+from sklearn.utils.fixes import CSR_CONTAINERS
+
+# generate data
+rng = np.random.RandomState(0)
+X = rng.random_sample(size=(300, 50))
+Y = rng.random_sample(size=(300, 50))
+X /= X.sum(axis=1)[:, np.newaxis]
+Y /= Y.sum(axis=1)[:, np.newaxis]
+
+
+@pytest.mark.parametrize("gamma", [0.1, 1, 2.5])
+@pytest.mark.parametrize("degree, n_components", [(1, 500), (2, 500), (3, 5000)])
+@pytest.mark.parametrize("coef0", [0, 2.5])
+def test_polynomial_count_sketch(gamma, degree, coef0, n_components):
+    # test that PolynomialCountSketch approximates polynomial
+    # kernel on random data
+
+    # compute exact kernel
+    kernel = polynomial_kernel(X, Y, gamma=gamma, degree=degree, coef0=coef0)
+
+    # approximate kernel mapping
+    ps_transform = PolynomialCountSketch(
+        n_components=n_components,
+        gamma=gamma,
+        coef0=coef0,
+        degree=degree,
+        random_state=42,
+    )
+    X_trans = ps_transform.fit_transform(X)
+    Y_trans = ps_transform.transform(Y)
+    kernel_approx = np.dot(X_trans, Y_trans.T)
+
+    error = kernel - kernel_approx
+    assert np.abs(np.mean(error)) <= 0.05  # close to unbiased
+    np.abs(error, out=error)
+    assert np.max(error) <= 0.1  # nothing too far off
+    assert np.mean(error) <= 0.05  # mean is fairly close
+
+
+@pytest.mark.parametrize("csr_container", CSR_CONTAINERS)
+@pytest.mark.parametrize("gamma", [0.1, 1.0])
+@pytest.mark.parametrize("degree", [1, 2, 3])
+@pytest.mark.parametrize("coef0", [0, 2.5])
+def test_polynomial_count_sketch_dense_sparse(gamma, degree, coef0, csr_container):
+    """Check that PolynomialCountSketch results are the same for dense and sparse
+    input.
+    """
+    ps_dense = PolynomialCountSketch(
+        n_components=500, gamma=gamma, degree=degree, coef0=coef0, random_state=42
+    )
+    Xt_dense = ps_dense.fit_transform(X)
+    Yt_dense = ps_dense.transform(Y)
+
+    ps_sparse = PolynomialCountSketch(
+        n_components=500, gamma=gamma, degree=degree, coef0=coef0, random_state=42
+    )
+    Xt_sparse = ps_sparse.fit_transform(csr_container(X))
+    Yt_sparse = ps_sparse.transform(csr_container(Y))
+
+    assert_allclose(Xt_dense, Xt_sparse)
+    assert_allclose(Yt_dense, Yt_sparse)
+
+
+def _linear_kernel(X, Y):
+    return np.dot(X, Y.T)
+
+
+@pytest.mark.parametrize("csr_container", CSR_CONTAINERS)
+def test_additive_chi2_sampler(csr_container):
+    # test that AdditiveChi2Sampler approximates kernel on random data
+
+    # compute exact kernel
+    # abbreviations for easier formula
+    X_ = X[:, np.newaxis, :]
+    Y_ = Y[np.newaxis, :, :]
+
+    large_kernel = 2 * X_ * Y_ / (X_ + Y_)
+
+    # reduce to n_samples_x x n_samples_y by summing over features
+    kernel = large_kernel.sum(axis=2)
+
+    # approximate kernel mapping
+    transform = AdditiveChi2Sampler(sample_steps=3)
+    X_trans = transform.fit_transform(X)
+    Y_trans = transform.transform(Y)
+
+    kernel_approx = np.dot(X_trans, Y_trans.T)
+
+    assert_array_almost_equal(kernel, kernel_approx, 1)
+
+    X_sp_trans = transform.fit_transform(csr_container(X))
+    Y_sp_trans = transform.transform(csr_container(Y))
+
+    assert_array_equal(X_trans, X_sp_trans.toarray())
+    assert_array_equal(Y_trans, Y_sp_trans.toarray())
+
+    # test error is raised on negative input
+    Y_neg = Y.copy()
+    Y_neg[0, 0] = -1
+    msg = "Negative values in data passed to"
+    with pytest.raises(ValueError, match=msg):
+        transform.fit(Y_neg)
+
+
+@pytest.mark.parametrize("method", ["fit", "fit_transform", "transform"])
+@pytest.mark.parametrize("sample_steps", range(1, 4))
+def test_additive_chi2_sampler_sample_steps(method, sample_steps):
+    """Check that the input sample step doesn't raise an error
+    and that sample interval doesn't change after fit.
+    """
+    transformer = AdditiveChi2Sampler(sample_steps=sample_steps)
+    getattr(transformer, method)(X)
+
+    sample_interval = 0.5
+    transformer = AdditiveChi2Sampler(
+        sample_steps=sample_steps,
+        sample_interval=sample_interval,
+    )
+    getattr(transformer, method)(X)
+    assert transformer.sample_interval == sample_interval
+
+
+@pytest.mark.parametrize("method", ["fit", "fit_transform", "transform"])
+def test_additive_chi2_sampler_wrong_sample_steps(method):
+    """Check that we raise a ValueError on invalid sample_steps"""
+    transformer = AdditiveChi2Sampler(sample_steps=4)
+    msg = re.escape(
+        "If sample_steps is not in [1, 2, 3], you need to provide sample_interval"
+    )
+    with pytest.raises(ValueError, match=msg):
+        getattr(transformer, method)(X)
+
+
+def test_skewed_chi2_sampler():
+    # test that RBFSampler approximates kernel on random data
+
+    # compute exact kernel
+    c = 0.03
+    # set on negative component but greater than c to ensure that the kernel
+    # approximation is valid on the group (-c; +\infty) endowed with the skewed
+    # multiplication.
+    Y[0, 0] = -c / 2.0
+
+    # abbreviations for easier formula
+    X_c = (X + c)[:, np.newaxis, :]
+    Y_c = (Y + c)[np.newaxis, :, :]
+
+    # we do it in log-space in the hope that it's more stable
+    # this array is n_samples_x x n_samples_y big x n_features
+    log_kernel = (
+        (np.log(X_c) / 2.0) + (np.log(Y_c) / 2.0) + np.log(2.0) - np.log(X_c + Y_c)
+    )
+    # reduce to n_samples_x x n_samples_y by summing over features in log-space
+    kernel = np.exp(log_kernel.sum(axis=2))
+
+    # approximate kernel mapping
+    transform = SkewedChi2Sampler(skewedness=c, n_components=1000, random_state=42)
+    X_trans = transform.fit_transform(X)
+    Y_trans = transform.transform(Y)
+
+    kernel_approx = np.dot(X_trans, Y_trans.T)
+    assert_array_almost_equal(kernel, kernel_approx, 1)
+    assert np.isfinite(kernel).all(), "NaNs found in the Gram matrix"
+    assert np.isfinite(kernel_approx).all(), "NaNs found in the approximate Gram matrix"
+
+    # test error is raised on when inputs contains values smaller than -c
+    Y_neg = Y.copy()
+    Y_neg[0, 0] = -c * 2.0
+    msg = "X may not contain entries smaller than -skewedness"
+    with pytest.raises(ValueError, match=msg):
+        transform.transform(Y_neg)
+
+
+def test_additive_chi2_sampler_exceptions():
+    """Ensures correct error message"""
+    transformer = AdditiveChi2Sampler()
+    X_neg = X.copy()
+    X_neg[0, 0] = -1
+    with pytest.raises(ValueError, match="X in AdditiveChi2Sampler.fit"):
+        transformer.fit(X_neg)
+    with pytest.raises(ValueError, match="X in AdditiveChi2Sampler.transform"):
+        transformer.fit(X)
+        transformer.transform(X_neg)
+
+
+def test_rbf_sampler():
+    # test that RBFSampler approximates kernel on random data
+    # compute exact kernel
+    gamma = 10.0
+    kernel = rbf_kernel(X, Y, gamma=gamma)
+
+    # approximate kernel mapping
+    rbf_transform = RBFSampler(gamma=gamma, n_components=1000, random_state=42)
+    X_trans = rbf_transform.fit_transform(X)
+    Y_trans = rbf_transform.transform(Y)
+    kernel_approx = np.dot(X_trans, Y_trans.T)
+
+    error = kernel - kernel_approx
+    assert np.abs(np.mean(error)) <= 0.01  # close to unbiased
+    np.abs(error, out=error)
+    assert np.max(error) <= 0.1  # nothing too far off
+    assert np.mean(error) <= 0.05  # mean is fairly close
+
+
+def test_rbf_sampler_fitted_attributes_dtype(global_dtype):
+    """Check that the fitted attributes are stored accordingly to the
+    data type of X."""
+    rbf = RBFSampler()
+
+    X = np.array([[1, 2], [3, 4], [5, 6]], dtype=global_dtype)
+
+    rbf.fit(X)
+
+    assert rbf.random_offset_.dtype == global_dtype
+    assert rbf.random_weights_.dtype == global_dtype
+
+
+def test_rbf_sampler_dtype_equivalence():
+    """Check the equivalence of the results with 32 and 64 bits input."""
+    rbf32 = RBFSampler(random_state=42)
+    X32 = np.array([[1, 2], [3, 4], [5, 6]], dtype=np.float32)
+    rbf32.fit(X32)
+
+    rbf64 = RBFSampler(random_state=42)
+    X64 = np.array([[1, 2], [3, 4], [5, 6]], dtype=np.float64)
+    rbf64.fit(X64)
+
+    assert_allclose(rbf32.random_offset_, rbf64.random_offset_)
+    assert_allclose(rbf32.random_weights_, rbf64.random_weights_)
+
+
+def test_rbf_sampler_gamma_scale():
+    """Check the inner value computed when `gamma='scale'`."""
+    X, y = [[0.0], [1.0]], [0, 1]
+    rbf = RBFSampler(gamma="scale")
+    rbf.fit(X, y)
+    assert rbf._gamma == pytest.approx(4)
+
+
+def test_skewed_chi2_sampler_fitted_attributes_dtype(global_dtype):
+    """Check that the fitted attributes are stored accordingly to the
+    data type of X."""
+    skewed_chi2_sampler = SkewedChi2Sampler()
+
+    X = np.array([[1, 2], [3, 4], [5, 6]], dtype=global_dtype)
+
+    skewed_chi2_sampler.fit(X)
+
+    assert skewed_chi2_sampler.random_offset_.dtype == global_dtype
+    assert skewed_chi2_sampler.random_weights_.dtype == global_dtype
+
+
+def test_skewed_chi2_sampler_dtype_equivalence():
+    """Check the equivalence of the results with 32 and 64 bits input."""
+    skewed_chi2_sampler_32 = SkewedChi2Sampler(random_state=42)
+    X_32 = np.array([[1, 2], [3, 4], [5, 6]], dtype=np.float32)
+    skewed_chi2_sampler_32.fit(X_32)
+
+    skewed_chi2_sampler_64 = SkewedChi2Sampler(random_state=42)
+    X_64 = np.array([[1, 2], [3, 4], [5, 6]], dtype=np.float64)
+    skewed_chi2_sampler_64.fit(X_64)
+
+    assert_allclose(
+        skewed_chi2_sampler_32.random_offset_, skewed_chi2_sampler_64.random_offset_
+    )
+    assert_allclose(
+        skewed_chi2_sampler_32.random_weights_, skewed_chi2_sampler_64.random_weights_
+    )
+
+
+@pytest.mark.parametrize("csr_container", CSR_CONTAINERS)
+def test_input_validation(csr_container):
+    # Regression test: kernel approx. transformers should work on lists
+    # No assertions; the old versions would simply crash
+    X = [[1, 2], [3, 4], [5, 6]]
+    AdditiveChi2Sampler().fit(X).transform(X)
+    SkewedChi2Sampler().fit(X).transform(X)
+    RBFSampler().fit(X).transform(X)
+
+    X = csr_container(X)
+    RBFSampler().fit(X).transform(X)
+
+
+def test_nystroem_approximation():
+    # some basic tests
+    rnd = np.random.RandomState(0)
+    X = rnd.uniform(size=(10, 4))
+
+    # With n_components = n_samples this is exact
+    X_transformed = Nystroem(n_components=X.shape[0]).fit_transform(X)
+    K = rbf_kernel(X)
+    assert_array_almost_equal(np.dot(X_transformed, X_transformed.T), K)
+
+    trans = Nystroem(n_components=2, random_state=rnd)
+    X_transformed = trans.fit(X).transform(X)
+    assert X_transformed.shape == (X.shape[0], 2)
+
+    # test callable kernel
+    trans = Nystroem(n_components=2, kernel=_linear_kernel, random_state=rnd)
+    X_transformed = trans.fit(X).transform(X)
+    assert X_transformed.shape == (X.shape[0], 2)
+
+    # test that available kernels fit and transform
+    kernels_available = kernel_metrics()
+    for kern in kernels_available:
+        trans = Nystroem(n_components=2, kernel=kern, random_state=rnd)
+        X_transformed = trans.fit(X).transform(X)
+        assert X_transformed.shape == (X.shape[0], 2)
+
+
+def test_nystroem_default_parameters():
+    rnd = np.random.RandomState(42)
+    X = rnd.uniform(size=(10, 4))
+
+    # rbf kernel should behave as gamma=None by default
+    # aka gamma = 1 / n_features
+    nystroem = Nystroem(n_components=10)
+    X_transformed = nystroem.fit_transform(X)
+    K = rbf_kernel(X, gamma=None)
+    K2 = np.dot(X_transformed, X_transformed.T)
+    assert_array_almost_equal(K, K2)
+
+    # chi2 kernel should behave as gamma=1 by default
+    nystroem = Nystroem(kernel="chi2", n_components=10)
+    X_transformed = nystroem.fit_transform(X)
+    K = chi2_kernel(X, gamma=1)
+    K2 = np.dot(X_transformed, X_transformed.T)
+    assert_array_almost_equal(K, K2)
+
+
+def test_nystroem_singular_kernel():
+    # test that nystroem works with singular kernel matrix
+    rng = np.random.RandomState(0)
+    X = rng.rand(10, 20)
+    X = np.vstack([X] * 2)  # duplicate samples
+
+    gamma = 100
+    N = Nystroem(gamma=gamma, n_components=X.shape[0]).fit(X)
+    X_transformed = N.transform(X)
+
+    K = rbf_kernel(X, gamma=gamma)
+
+    assert_array_almost_equal(K, np.dot(X_transformed, X_transformed.T))
+    assert np.all(np.isfinite(Y))
+
+
+def test_nystroem_poly_kernel_params():
+    # Non-regression: Nystroem should pass other parameters beside gamma.
+    rnd = np.random.RandomState(37)
+    X = rnd.uniform(size=(10, 4))
+
+    K = polynomial_kernel(X, degree=3.1, coef0=0.1)
+    nystroem = Nystroem(
+        kernel="polynomial", n_components=X.shape[0], degree=3.1, coef0=0.1
+    )
+    X_transformed = nystroem.fit_transform(X)
+    assert_array_almost_equal(np.dot(X_transformed, X_transformed.T), K)
+
+
+def test_nystroem_callable():
+    # Test Nystroem on a callable.
+    rnd = np.random.RandomState(42)
+    n_samples = 10
+    X = rnd.uniform(size=(n_samples, 4))
+
+    def logging_histogram_kernel(x, y, log):
+        """Histogram kernel that writes to a log."""
+        log.append(1)
+        return np.minimum(x, y).sum()
+
+    kernel_log = []
+    X = list(X)  # test input validation
+    Nystroem(
+        kernel=logging_histogram_kernel,
+        n_components=(n_samples - 1),
+        kernel_params={"log": kernel_log},
+    ).fit(X)
+    assert len(kernel_log) == n_samples * (n_samples - 1) / 2
+
+    # if degree, gamma or coef0 is passed, we raise a ValueError
+    msg = "Don't pass gamma, coef0 or degree to Nystroem"
+    params = ({"gamma": 1}, {"coef0": 1}, {"degree": 2})
+    for param in params:
+        ny = Nystroem(kernel=_linear_kernel, n_components=(n_samples - 1), **param)
+        with pytest.raises(ValueError, match=msg):
+            ny.fit(X)
+
+
+def test_nystroem_precomputed_kernel():
+    # Non-regression: test Nystroem on precomputed kernel.
+    # PR - 14706
+    rnd = np.random.RandomState(12)
+    X = rnd.uniform(size=(10, 4))
+
+    K = polynomial_kernel(X, degree=2, coef0=0.1)
+    nystroem = Nystroem(kernel="precomputed", n_components=X.shape[0])
+    X_transformed = nystroem.fit_transform(K)
+    assert_array_almost_equal(np.dot(X_transformed, X_transformed.T), K)
+
+    # if degree, gamma or coef0 is passed, we raise a ValueError
+    msg = "Don't pass gamma, coef0 or degree to Nystroem"
+    params = ({"gamma": 1}, {"coef0": 1}, {"degree": 2})
+    for param in params:
+        ny = Nystroem(kernel="precomputed", n_components=X.shape[0], **param)
+        with pytest.raises(ValueError, match=msg):
+            ny.fit(K)
+
+
+def test_nystroem_component_indices():
+    """Check that `component_indices_` corresponds to the subset of
+    training points used to construct the feature map.
+    Non-regression test for:
+    https://github.com/scikit-learn/scikit-learn/issues/20474
+    """
+    X, _ = make_classification(n_samples=100, n_features=20)
+    feature_map_nystroem = Nystroem(
+        n_components=10,
+        random_state=0,
+    )
+    feature_map_nystroem.fit(X)
+    assert feature_map_nystroem.component_indices_.shape == (10,)
+
+
+@pytest.mark.parametrize(
+    "Estimator", [PolynomialCountSketch, RBFSampler, SkewedChi2Sampler, Nystroem]
+)
+def test_get_feature_names_out(Estimator):
+    """Check get_feature_names_out"""
+    est = Estimator().fit(X)
+    X_trans = est.transform(X)
+
+    names_out = est.get_feature_names_out()
+    class_name = Estimator.__name__.lower()
+    expected_names = [f"{class_name}{i}" for i in range(X_trans.shape[1])]
+    assert_array_equal(names_out, expected_names)
+
+
+def test_additivechi2sampler_get_feature_names_out():
+    """Check get_feature_names_out for AdditiveChi2Sampler."""
+    rng = np.random.RandomState(0)
+    X = rng.random_sample(size=(300, 3))
+
+    chi2_sampler = AdditiveChi2Sampler(sample_steps=3).fit(X)
+    input_names = ["f0", "f1", "f2"]
+    suffixes = [
+        "f0_sqrt",
+        "f1_sqrt",
+        "f2_sqrt",
+        "f0_cos1",
+        "f1_cos1",
+        "f2_cos1",
+        "f0_sin1",
+        "f1_sin1",
+        "f2_sin1",
+        "f0_cos2",
+        "f1_cos2",
+        "f2_cos2",
+        "f0_sin2",
+        "f1_sin2",
+        "f2_sin2",
+    ]
+
+    names_out = chi2_sampler.get_feature_names_out(input_features=input_names)
+    expected_names = [f"additivechi2sampler_{suffix}" for suffix in suffixes]
+    assert_array_equal(names_out, expected_names)
@@ -0,0 +1,80 @@
+import numpy as np
+import pytest
+
+from sklearn.datasets import make_regression
+from sklearn.kernel_ridge import KernelRidge
+from sklearn.linear_model import Ridge
+from sklearn.metrics.pairwise import pairwise_kernels
+from sklearn.utils._testing import assert_array_almost_equal, ignore_warnings
+from sklearn.utils.fixes import CSC_CONTAINERS, CSR_CONTAINERS
+
+X, y = make_regression(n_features=10, random_state=0)
+Y = np.array([y, y]).T
+
+
+def test_kernel_ridge():
+    pred = Ridge(alpha=1, fit_intercept=False).fit(X, y).predict(X)
+    pred2 = KernelRidge(kernel="linear", alpha=1).fit(X, y).predict(X)
+    assert_array_almost_equal(pred, pred2)
+
+
+@pytest.mark.parametrize("sparse_container", [*CSR_CONTAINERS, *CSC_CONTAINERS])
+def test_kernel_ridge_sparse(sparse_container):
+    X_sparse = sparse_container(X)
+    pred = (
+        Ridge(alpha=1, fit_intercept=False, solver="cholesky")
+        .fit(X_sparse, y)
+        .predict(X_sparse)
+    )
+    pred2 = KernelRidge(kernel="linear", alpha=1).fit(X_sparse, y).predict(X_sparse)
+    assert_array_almost_equal(pred, pred2)
+
+
+def test_kernel_ridge_singular_kernel():
+    # alpha=0 causes a LinAlgError in computing the dual coefficients,
+    # which causes a fallback to a lstsq solver. This is tested here.
+    pred = Ridge(alpha=0, fit_intercept=False).fit(X, y).predict(X)
+    kr = KernelRidge(kernel="linear", alpha=0)
+    ignore_warnings(kr.fit)(X, y)
+    pred2 = kr.predict(X)
+    assert_array_almost_equal(pred, pred2)
+
+
+def test_kernel_ridge_precomputed():
+    for kernel in ["linear", "rbf", "poly", "cosine"]:
+        K = pairwise_kernels(X, X, metric=kernel)
+        pred = KernelRidge(kernel=kernel).fit(X, y).predict(X)
+        pred2 = KernelRidge(kernel="precomputed").fit(K, y).predict(K)
+        assert_array_almost_equal(pred, pred2)
+
+
+def test_kernel_ridge_precomputed_kernel_unchanged():
+    K = np.dot(X, X.T)
+    K2 = K.copy()
+    KernelRidge(kernel="precomputed").fit(K, y)
+    assert_array_almost_equal(K, K2)
+
+
+def test_kernel_ridge_sample_weights():
+    K = np.dot(X, X.T)  # precomputed kernel
+    sw = np.random.RandomState(0).rand(X.shape[0])
+
+    pred = Ridge(alpha=1, fit_intercept=False).fit(X, y, sample_weight=sw).predict(X)
+    pred2 = KernelRidge(kernel="linear", alpha=1).fit(X, y, sample_weight=sw).predict(X)
+    pred3 = (
+        KernelRidge(kernel="precomputed", alpha=1)
+        .fit(K, y, sample_weight=sw)
+        .predict(K)
+    )
+    assert_array_almost_equal(pred, pred2)
+    assert_array_almost_equal(pred, pred3)
+
+
+def test_kernel_ridge_multi_output():
+    pred = Ridge(alpha=1, fit_intercept=False).fit(X, Y).predict(X)
+    pred2 = KernelRidge(kernel="linear", alpha=1).fit(X, Y).predict(X)
+    assert_array_almost_equal(pred, pred2)
+
+    pred3 = KernelRidge(kernel="linear", alpha=1).fit(X, y).predict(X)
+    pred3 = np.array([pred3, pred3]).T
+    assert_array_almost_equal(pred2, pred3)
@@ -0,0 +1,307 @@
+"""Common tests for metaestimators"""
+
+import functools
+from inspect import signature
+
+import numpy as np
+import pytest
+
+from sklearn.base import BaseEstimator, is_regressor
+from sklearn.datasets import make_classification
+from sklearn.ensemble import BaggingClassifier
+from sklearn.exceptions import NotFittedError
+from sklearn.feature_extraction.text import TfidfVectorizer
+from sklearn.feature_selection import RFE, RFECV
+from sklearn.linear_model import LogisticRegression, Ridge
+from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
+from sklearn.pipeline import Pipeline, make_pipeline
+from sklearn.preprocessing import MaxAbsScaler, StandardScaler
+from sklearn.semi_supervised import SelfTrainingClassifier
+from sklearn.utils import all_estimators
+from sklearn.utils._testing import set_random_state
+from sklearn.utils.estimator_checks import (
+    _enforce_estimator_tags_X,
+    _enforce_estimator_tags_y,
+)
+from sklearn.utils.validation import check_is_fitted
+
+
+class DelegatorData:
+    def __init__(
+        self,
+        name,
+        construct,
+        skip_methods=(),
+        fit_args=make_classification(random_state=0),
+    ):
+        self.name = name
+        self.construct = construct
+        self.fit_args = fit_args
+        self.skip_methods = skip_methods
+
+
+DELEGATING_METAESTIMATORS = [
+    DelegatorData("Pipeline", lambda est: Pipeline([("est", est)])),
+    DelegatorData(
+        "GridSearchCV",
+        lambda est: GridSearchCV(est, param_grid={"param": [5]}, cv=2),
+        skip_methods=["score"],
+    ),
+    DelegatorData(
+        "RandomizedSearchCV",
+        lambda est: RandomizedSearchCV(
+            est, param_distributions={"param": [5]}, cv=2, n_iter=1
+        ),
+        skip_methods=["score"],
+    ),
+    DelegatorData("RFE", RFE, skip_methods=["transform", "inverse_transform"]),
+    DelegatorData("RFECV", RFECV, skip_methods=["transform", "inverse_transform"]),
+    DelegatorData(
+        "BaggingClassifier",
+        BaggingClassifier,
+        skip_methods=[
+            "transform",
+            "inverse_transform",
+            "score",
+            "predict_proba",
+            "predict_log_proba",
+            "predict",
+        ],
+    ),
+    DelegatorData(
+        "SelfTrainingClassifier",
+        lambda est: SelfTrainingClassifier(est),
+        skip_methods=["transform", "inverse_transform", "predict_proba"],
+    ),
+]
+
+
+def test_metaestimator_delegation():
+    # Ensures specified metaestimators have methods iff subestimator does
+    def hides(method):
+        @property
+        def wrapper(obj):
+            if obj.hidden_method == method.__name__:
+                raise AttributeError("%r is hidden" % obj.hidden_method)
+            return functools.partial(method, obj)
+
+        return wrapper
+
+    class SubEstimator(BaseEstimator):
+        def __init__(self, param=1, hidden_method=None):
+            self.param = param
+            self.hidden_method = hidden_method
+
+        def fit(self, X, y=None, *args, **kwargs):
+            self.coef_ = np.arange(X.shape[1])
+            self.classes_ = []
+            return True
+
+        def _check_fit(self):
+            check_is_fitted(self)
+
+        @hides
+        def inverse_transform(self, X, *args, **kwargs):
+            self._check_fit()
+            return X
+
+        @hides
+        def transform(self, X, *args, **kwargs):
+            self._check_fit()
+            return X
+
+        @hides
+        def predict(self, X, *args, **kwargs):
+            self._check_fit()
+            return np.ones(X.shape[0])
+
+        @hides
+        def predict_proba(self, X, *args, **kwargs):
+            self._check_fit()
+            return np.ones(X.shape[0])
+
+        @hides
+        def predict_log_proba(self, X, *args, **kwargs):
+            self._check_fit()
+            return np.ones(X.shape[0])
+
+        @hides
+        def decision_function(self, X, *args, **kwargs):
+            self._check_fit()
+            return np.ones(X.shape[0])
+
+        @hides
+        def score(self, X, y, *args, **kwargs):
+            self._check_fit()
+            return 1.0
+
+    methods = [
+        k
+        for k in SubEstimator.__dict__.keys()
+        if not k.startswith("_") and not k.startswith("fit")
+    ]
+    methods.sort()
+
+    for delegator_data in DELEGATING_METAESTIMATORS:
+        delegate = SubEstimator()
+        delegator = delegator_data.construct(delegate)
+        for method in methods:
+            if method in delegator_data.skip_methods:
+                continue
+            assert hasattr(delegate, method)
+            assert hasattr(
+                delegator, method
+            ), "%s does not have method %r when its delegate does" % (
+                delegator_data.name,
+                method,
+            )
+            # delegation before fit raises a NotFittedError
+            if method == "score":
+                with pytest.raises(NotFittedError):
+                    getattr(delegator, method)(
+                        delegator_data.fit_args[0], delegator_data.fit_args[1]
+                    )
+            else:
+                with pytest.raises(NotFittedError):
+                    getattr(delegator, method)(delegator_data.fit_args[0])
+
+        delegator.fit(*delegator_data.fit_args)
+        for method in methods:
+            if method in delegator_data.skip_methods:
+                continue
+            # smoke test delegation
+            if method == "score":
+                getattr(delegator, method)(
+                    delegator_data.fit_args[0], delegator_data.fit_args[1]
+                )
+            else:
+                getattr(delegator, method)(delegator_data.fit_args[0])
+
+        for method in methods:
+            if method in delegator_data.skip_methods:
+                continue
+            delegate = SubEstimator(hidden_method=method)
+            delegator = delegator_data.construct(delegate)
+            assert not hasattr(delegate, method)
+            assert not hasattr(
+                delegator, method
+            ), "%s has method %r when its delegate does not" % (
+                delegator_data.name,
+                method,
+            )
+
+
+def _generate_meta_estimator_instances_with_pipeline():
+    """Generate instances of meta-estimators fed with a pipeline
+
+    Are considered meta-estimators all estimators accepting one of "estimator",
+    "base_estimator" or "estimators".
+    """
+    for _, Estimator in sorted(all_estimators()):
+        sig = set(signature(Estimator).parameters)
+
+        if "estimator" in sig or "base_estimator" in sig or "regressor" in sig:
+            if is_regressor(Estimator):
+                estimator = make_pipeline(TfidfVectorizer(), Ridge())
+                param_grid = {"ridge__alpha": [0.1, 1.0]}
+            else:
+                estimator = make_pipeline(TfidfVectorizer(), LogisticRegression())
+                param_grid = {"logisticregression__C": [0.1, 1.0]}
+
+            if "param_grid" in sig or "param_distributions" in sig:
+                # SearchCV estimators
+                extra_params = {"n_iter": 2} if "n_iter" in sig else {}
+                yield Estimator(estimator, param_grid, **extra_params)
+            else:
+                yield Estimator(estimator)
+
+        elif "transformer_list" in sig:
+            # FeatureUnion
+            transformer_list = [
+                ("trans1", make_pipeline(TfidfVectorizer(), MaxAbsScaler())),
+                (
+                    "trans2",
+                    make_pipeline(TfidfVectorizer(), StandardScaler(with_mean=False)),
+                ),
+            ]
+            yield Estimator(transformer_list)
+
+        elif "estimators" in sig:
+            # stacking, voting
+            if is_regressor(Estimator):
+                estimator = [
+                    ("est1", make_pipeline(TfidfVectorizer(), Ridge(alpha=0.1))),
+                    ("est2", make_pipeline(TfidfVectorizer(), Ridge(alpha=1))),
+                ]
+            else:
+                estimator = [
+                    (
+                        "est1",
+                        make_pipeline(TfidfVectorizer(), LogisticRegression(C=0.1)),
+                    ),
+                    ("est2", make_pipeline(TfidfVectorizer(), LogisticRegression(C=1))),
+                ]
+            yield Estimator(estimator)
+
+        else:
+            continue
+
+
+# TODO: remove data validation for the following estimators
+# They should be able to work on any data and delegate data validation to
+# their inner estimator(s).
+DATA_VALIDATION_META_ESTIMATORS_TO_IGNORE = [
+    "AdaBoostClassifier",
+    "AdaBoostRegressor",
+    "BaggingClassifier",
+    "BaggingRegressor",
+    "ClassifierChain",  # data validation is necessary
+    "IterativeImputer",
+    "OneVsOneClassifier",  # input validation can't be avoided
+    "RANSACRegressor",
+    "RFE",
+    "RFECV",
+    "RegressorChain",  # data validation is necessary
+    "SelfTrainingClassifier",
+    "SequentialFeatureSelector",  # not applicable (2D data mandatory)
+]
+
+DATA_VALIDATION_META_ESTIMATORS = [
+    est
+    for est in _generate_meta_estimator_instances_with_pipeline()
+    if est.__class__.__name__ not in DATA_VALIDATION_META_ESTIMATORS_TO_IGNORE
+]
+
+
+def _get_meta_estimator_id(estimator):
+    return estimator.__class__.__name__
+
+
+@pytest.mark.parametrize(
+    "estimator", DATA_VALIDATION_META_ESTIMATORS, ids=_get_meta_estimator_id
+)
+def test_meta_estimators_delegate_data_validation(estimator):
+    # Check that meta-estimators delegate data validation to the inner
+    # estimator(s).
+    rng = np.random.RandomState(0)
+    set_random_state(estimator)
+
+    n_samples = 30
+    X = rng.choice(np.array(["aa", "bb", "cc"], dtype=object), size=n_samples)
+
+    if is_regressor(estimator):
+        y = rng.normal(size=n_samples)
+    else:
+        y = rng.randint(3, size=n_samples)
+
+    # We convert to lists to make sure it works on array-like
+    X = _enforce_estimator_tags_X(estimator, X).tolist()
+    y = _enforce_estimator_tags_y(estimator, y).tolist()
+
+    # Calling fit should not raise any data validation exception since X is a
+    # valid input datastructure for the first step of the pipeline passed as
+    # base estimator to the meta estimator.
+    estimator.fit(X, y)
+
+    # n_features_in_ should not be defined since data is not tabular data.
+    assert not hasattr(estimator, "n_features_in_")
@@ -0,0 +1,826 @@
+import copy
+import re
+
+import numpy as np
+import pytest
+
+from sklearn import config_context
+from sklearn.base import is_classifier
+from sklearn.calibration import CalibratedClassifierCV
+from sklearn.compose import TransformedTargetRegressor
+from sklearn.covariance import GraphicalLassoCV
+from sklearn.ensemble import (
+    AdaBoostClassifier,
+    AdaBoostRegressor,
+    BaggingClassifier,
+    BaggingRegressor,
+    StackingClassifier,
+    StackingRegressor,
+)
+from sklearn.exceptions import UnsetMetadataPassedError
+from sklearn.experimental import (
+    enable_halving_search_cv,  # noqa
+    enable_iterative_imputer,  # noqa
+)
+from sklearn.feature_selection import (
+    RFE,
+    RFECV,
+    SelectFromModel,
+    SequentialFeatureSelector,
+)
+from sklearn.impute import IterativeImputer
+from sklearn.linear_model import (
+    ElasticNetCV,
+    LarsCV,
+    LassoCV,
+    LassoLarsCV,
+    LogisticRegressionCV,
+    MultiTaskElasticNetCV,
+    MultiTaskLassoCV,
+    OrthogonalMatchingPursuitCV,
+    RANSACRegressor,
+    RidgeClassifierCV,
+    RidgeCV,
+)
+from sklearn.model_selection import (
+    FixedThresholdClassifier,
+    GridSearchCV,
+    HalvingGridSearchCV,
+    HalvingRandomSearchCV,
+    RandomizedSearchCV,
+    TunedThresholdClassifierCV,
+)
+from sklearn.multiclass import (
+    OneVsOneClassifier,
+    OneVsRestClassifier,
+    OutputCodeClassifier,
+)
+from sklearn.multioutput import (
+    ClassifierChain,
+    MultiOutputClassifier,
+    MultiOutputRegressor,
+    RegressorChain,
+)
+from sklearn.semi_supervised import SelfTrainingClassifier
+from sklearn.tests.metadata_routing_common import (
+    ConsumingClassifier,
+    ConsumingRegressor,
+    ConsumingScorer,
+    ConsumingSplitter,
+    NonConsumingClassifier,
+    NonConsumingRegressor,
+    _Registry,
+    assert_request_is_empty,
+    check_recorded_metadata,
+)
+from sklearn.utils.metadata_routing import MetadataRouter
+
+rng = np.random.RandomState(42)
+N, M = 100, 4
+X = rng.rand(N, M)
+y = rng.randint(0, 3, size=N)
+y_binary = (y >= 1).astype(int)
+classes = np.unique(y)
+y_multi = rng.randint(0, 3, size=(N, 3))
+classes_multi = [np.unique(y_multi[:, i]) for i in range(y_multi.shape[1])]
+metadata = rng.randint(0, 10, size=N)
+sample_weight = rng.rand(N)
+groups = np.array([0, 1] * (len(y) // 2))
+
+
+@pytest.fixture(autouse=True)
+def enable_slep006():
+    """Enable SLEP006 for all tests."""
+    with config_context(enable_metadata_routing=True):
+        yield
+
+
+METAESTIMATORS: list = [
+    {
+        "metaestimator": MultiOutputRegressor,
+        "estimator_name": "estimator",
+        "estimator": "regressor",
+        "X": X,
+        "y": y_multi,
+        "estimator_routing_methods": ["fit", "partial_fit"],
+    },
+    {
+        "metaestimator": MultiOutputClassifier,
+        "estimator_name": "estimator",
+        "estimator": "classifier",
+        "X": X,
+        "y": y_multi,
+        "estimator_routing_methods": ["fit", "partial_fit"],
+        "method_args": {"partial_fit": {"classes": classes_multi}},
+    },
+    {
+        "metaestimator": CalibratedClassifierCV,
+        "estimator_name": "estimator",
+        "estimator": "classifier",
+        "X": X,
+        "y": y,
+        "estimator_routing_methods": ["fit"],
+        "preserves_metadata": "subset",
+    },
+    {
+        "metaestimator": ClassifierChain,
+        "estimator_name": "base_estimator",
+        "estimator": "classifier",
+        "X": X,
+        "y": y_multi,
+        "estimator_routing_methods": ["fit"],
+    },
+    {
+        "metaestimator": RegressorChain,
+        "estimator_name": "base_estimator",
+        "estimator": "regressor",
+        "X": X,
+        "y": y_multi,
+        "estimator_routing_methods": ["fit"],
+    },
+    {
+        "metaestimator": LogisticRegressionCV,
+        "X": X,
+        "y": y,
+        "scorer_name": "scoring",
+        "scorer_routing_methods": ["fit", "score"],
+        "cv_name": "cv",
+        "cv_routing_methods": ["fit"],
+    },
+    {
+        "metaestimator": GridSearchCV,
+        "estimator_name": "estimator",
+        "estimator": "classifier",
+        "init_args": {"param_grid": {"alpha": [0.1, 0.2]}},
+        "X": X,
+        "y": y,
+        "estimator_routing_methods": ["fit"],
+        "preserves_metadata": "subset",
+        "scorer_name": "scoring",
+        "scorer_routing_methods": ["fit", "score"],
+        "cv_name": "cv",
+        "cv_routing_methods": ["fit"],
+    },
+    {
+        "metaestimator": RandomizedSearchCV,
+        "estimator_name": "estimator",
+        "estimator": "classifier",
+        "init_args": {"param_distributions": {"alpha": [0.1, 0.2]}},
+        "X": X,
+        "y": y,
+        "estimator_routing_methods": ["fit"],
+        "preserves_metadata": "subset",
+        "scorer_name": "scoring",
+        "scorer_routing_methods": ["fit", "score"],
+        "cv_name": "cv",
+        "cv_routing_methods": ["fit"],
+    },
+    {
+        "metaestimator": HalvingGridSearchCV,
+        "estimator_name": "estimator",
+        "estimator": "classifier",
+        "init_args": {"param_grid": {"alpha": [0.1, 0.2]}},
+        "X": X,
+        "y": y,
+        "estimator_routing_methods": ["fit"],
+        "preserves_metadata": "subset",
+        "scorer_name": "scoring",
+        "scorer_routing_methods": ["fit", "score"],
+        "cv_name": "cv",
+        "cv_routing_methods": ["fit"],
+    },
+    {
+        "metaestimator": HalvingRandomSearchCV,
+        "estimator_name": "estimator",
+        "estimator": "classifier",
+        "init_args": {"param_distributions": {"alpha": [0.1, 0.2]}},
+        "X": X,
+        "y": y,
+        "estimator_routing_methods": ["fit"],
+        "preserves_metadata": "subset",
+        "scorer_name": "scoring",
+        "scorer_routing_methods": ["fit", "score"],
+        "cv_name": "cv",
+        "cv_routing_methods": ["fit"],
+    },
+    {
+        "metaestimator": FixedThresholdClassifier,
+        "estimator_name": "estimator",
+        "estimator": "classifier",
+        "X": X,
+        "y": y_binary,
+        "estimator_routing_methods": ["fit"],
+        "preserves_metadata": "subset",
+    },
+    {
+        "metaestimator": TunedThresholdClassifierCV,
+        "estimator_name": "estimator",
+        "estimator": "classifier",
+        "X": X,
+        "y": y_binary,
+        "estimator_routing_methods": ["fit"],
+        "preserves_metadata": "subset",
+    },
+    {
+        "metaestimator": OneVsRestClassifier,
+        "estimator_name": "estimator",
+        "estimator": "classifier",
+        "X": X,
+        "y": y,
+        "estimator_routing_methods": ["fit", "partial_fit"],
+        "method_args": {"partial_fit": {"classes": classes}},
+    },
+    {
+        "metaestimator": OneVsOneClassifier,
+        "estimator_name": "estimator",
+        "estimator": "classifier",
+        "X": X,
+        "y": y,
+        "estimator_routing_methods": ["fit", "partial_fit"],
+        "preserves_metadata": "subset",
+        "method_args": {"partial_fit": {"classes": classes}},
+    },
+    {
+        "metaestimator": OutputCodeClassifier,
+        "estimator_name": "estimator",
+        "estimator": "classifier",
+        "init_args": {"random_state": 42},
+        "X": X,
+        "y": y,
+        "estimator_routing_methods": ["fit"],
+    },
+    {
+        "metaestimator": SelectFromModel,
+        "estimator_name": "estimator",
+        "estimator": "classifier",
+        "X": X,
+        "y": y,
+        "estimator_routing_methods": ["fit", "partial_fit"],
+        "method_args": {"partial_fit": {"classes": classes}},
+    },
+    {
+        "metaestimator": OrthogonalMatchingPursuitCV,
+        "X": X,
+        "y": y,
+        "cv_name": "cv",
+        "cv_routing_methods": ["fit"],
+    },
+    {
+        "metaestimator": ElasticNetCV,
+        "X": X,
+        "y": y,
+        "cv_name": "cv",
+        "cv_routing_methods": ["fit"],
+    },
+    {
+        "metaestimator": LassoCV,
+        "X": X,
+        "y": y,
+        "cv_name": "cv",
+        "cv_routing_methods": ["fit"],
+    },
+    {
+        "metaestimator": MultiTaskElasticNetCV,
+        "X": X,
+        "y": y_multi,
+        "cv_name": "cv",
+        "cv_routing_methods": ["fit"],
+    },
+    {
+        "metaestimator": MultiTaskLassoCV,
+        "X": X,
+        "y": y_multi,
+        "cv_name": "cv",
+        "cv_routing_methods": ["fit"],
+    },
+    {
+        "metaestimator": LarsCV,
+        "X": X,
+        "y": y,
+        "cv_name": "cv",
+        "cv_routing_methods": ["fit"],
+    },
+    {
+        "metaestimator": LassoLarsCV,
+        "X": X,
+        "y": y,
+        "cv_name": "cv",
+        "cv_routing_methods": ["fit"],
+    },
+    {
+        "metaestimator": RANSACRegressor,
+        "estimator_name": "estimator",
+        "estimator": "regressor",
+        "init_args": {"min_samples": 0.5},
+        "X": X,
+        "y": y,
+        "preserves_metadata": "subset",
+        "estimator_routing_methods": ["fit", "predict", "score"],
+        "method_mapping": {"fit": ["fit", "score"]},
+    },
+    {
+        "metaestimator": IterativeImputer,
+        "estimator_name": "estimator",
+        "estimator": "regressor",
+        "init_args": {"skip_complete": False},
+        "X": X,
+        "y": y,
+        "estimator_routing_methods": ["fit"],
+    },
+    {
+        "metaestimator": BaggingClassifier,
+        "estimator_name": "estimator",
+        "estimator": "classifier",
+        "X": X,
+        "y": y,
+        "preserves_metadata": False,
+        "estimator_routing_methods": ["fit"],
+    },
+    {
+        "metaestimator": BaggingRegressor,
+        "estimator_name": "estimator",
+        "estimator": "regressor",
+        "X": X,
+        "y": y,
+        "preserves_metadata": False,
+        "estimator_routing_methods": ["fit"],
+    },
+    {
+        "metaestimator": RidgeCV,
+        "X": X,
+        "y": y,
+        "scorer_name": "scoring",
+        "scorer_routing_methods": ["fit"],
+    },
+    {
+        "metaestimator": RidgeClassifierCV,
+        "X": X,
+        "y": y,
+        "scorer_name": "scoring",
+        "scorer_routing_methods": ["fit"],
+    },
+    {
+        "metaestimator": RidgeCV,
+        "X": X,
+        "y": y,
+        "scorer_name": "scoring",
+        "scorer_routing_methods": ["fit"],
+        "cv_name": "cv",
+        "cv_routing_methods": ["fit"],
+    },
+    {
+        "metaestimator": RidgeClassifierCV,
+        "X": X,
+        "y": y,
+        "scorer_name": "scoring",
+        "scorer_routing_methods": ["fit"],
+        "cv_name": "cv",
+        "cv_routing_methods": ["fit"],
+    },
+    {
+        "metaestimator": GraphicalLassoCV,
+        "X": X,
+        "y": y,
+        "cv_name": "cv",
+        "cv_routing_methods": ["fit"],
+    },
+]
+"""List containing all metaestimators to be tested and their settings
+
+The keys are as follows:
+
+- metaestimator: The metaestimator to be tested
+- estimator_name: The name of the argument for the sub-estimator
+- estimator: The sub-estimator type, either "regressor" or "classifier"
+- init_args: The arguments to be passed to the metaestimator's constructor
+- X: X-data to fit and predict
+- y: y-data to fit
+- estimator_routing_methods: list of all methods to check for routing metadata
+  to the sub-estimator
+- preserves_metadata:
+    - True (default): the metaestimator passes the metadata to the
+      sub-estimator without modification. We check that the values recorded by
+      the sub-estimator are identical to what we've passed to the
+      metaestimator.
+    - False: no check is performed regarding values, we only check that a
+      metadata with the expected names/keys are passed.
+    - "subset": we check that the recorded metadata by the sub-estimator is a
+      subset of what is passed to the metaestimator.
+- scorer_name: The name of the argument for the scorer
+- scorer_routing_methods: list of all methods to check for routing metadata
+  to the scorer
+- cv_name: The name of the argument for the CV splitter
+- cv_routing_methods: list of all methods to check for routing metadata
+  to the splitter
+- method_args: a dict of dicts, defining extra arguments needed to be passed to
+  methods, such as passing `classes` to `partial_fit`.
+- method_mapping: a dict of the form `{caller: [callee1, ...]}` which signals
+  which `.set_{method}_request` methods should be called to set request values.
+  If not present, a one-to-one mapping is assumed.
+"""
+
+# IDs used by pytest to get meaningful verbose messages when running the tests
+METAESTIMATOR_IDS = [str(row["metaestimator"].__name__) for row in METAESTIMATORS]
+
+UNSUPPORTED_ESTIMATORS = [
+    AdaBoostClassifier(),
+    AdaBoostRegressor(),
+    RFE(ConsumingClassifier()),
+    RFECV(ConsumingClassifier()),
+    SelfTrainingClassifier(ConsumingClassifier()),
+    SequentialFeatureSelector(ConsumingClassifier()),
+    StackingClassifier(ConsumingClassifier()),
+    StackingRegressor(ConsumingRegressor()),
+    TransformedTargetRegressor(),
+]
+
+
+def get_init_args(metaestimator_info, sub_estimator_consumes):
+    """Get the init args for a metaestimator
+
+    This is a helper function to get the init args for a metaestimator from
+    the METAESTIMATORS list. It returns an empty dict if no init args are
+    required.
+
+    Parameters
+    ----------
+    metaestimator_info : dict
+        The metaestimator info from METAESTIMATORS
+
+    sub_estimator_consumes : bool
+        Whether the sub-estimator consumes metadata or not.
+
+    Returns
+    -------
+    kwargs : dict
+        The init args for the metaestimator.
+
+    (estimator, estimator_registry) : (estimator, registry)
+        The sub-estimator and the corresponding registry.
+
+    (scorer, scorer_registry) : (scorer, registry)
+        The scorer and the corresponding registry.
+
+    (cv, cv_registry) : (CV splitter, registry)
+        The CV splitter and the corresponding registry.
+    """
+    kwargs = metaestimator_info.get("init_args", {})
+    estimator, estimator_registry = None, None
+    scorer, scorer_registry = None, None
+    cv, cv_registry = None, None
+    if "estimator" in metaestimator_info:
+        estimator_name = metaestimator_info["estimator_name"]
+        estimator_registry = _Registry()
+        sub_estimator_type = metaestimator_info["estimator"]
+        if sub_estimator_consumes:
+            if sub_estimator_type == "regressor":
+                estimator = ConsumingRegressor(estimator_registry)
+            elif sub_estimator_type == "classifier":
+                estimator = ConsumingClassifier(estimator_registry)
+            else:
+                raise ValueError("Unpermitted `sub_estimator_type`.")  # pragma: nocover
+        else:
+            if sub_estimator_type == "regressor":
+                estimator = NonConsumingRegressor()
+            elif sub_estimator_type == "classifier":
+                estimator = NonConsumingClassifier()
+            else:
+                raise ValueError("Unpermitted `sub_estimator_type`.")  # pragma: nocover
+        kwargs[estimator_name] = estimator
+    if "scorer_name" in metaestimator_info:
+        scorer_name = metaestimator_info["scorer_name"]
+        scorer_registry = _Registry()
+        scorer = ConsumingScorer(registry=scorer_registry)
+        kwargs[scorer_name] = scorer
+    if "cv_name" in metaestimator_info:
+        cv_name = metaestimator_info["cv_name"]
+        cv_registry = _Registry()
+        cv = ConsumingSplitter(registry=cv_registry)
+        kwargs[cv_name] = cv
+
+    return (
+        kwargs,
+        (estimator, estimator_registry),
+        (scorer, scorer_registry),
+        (cv, cv_registry),
+    )
+
+
+def set_requests(estimator, *, method_mapping, methods, metadata_name, value=True):
+    """Call `set_{method}_request` on a list of methods from the sub-estimator.
+
+    Parameters
+    ----------
+    estimator : BaseEstimator
+        The estimator for which `set_{method}_request` methods are called.
+
+    method_mapping : dict
+        The method mapping in the form of `{caller: [callee, ...]}`.
+        If a "caller" is not present in the method mapping, a one-to-one mapping is
+        assumed.
+
+    methods : list of str
+        The list of methods as "caller"s for which the request for the child should
+        be set.
+
+    metadata_name : str
+        The name of the metadata to be routed, usually either `"metadata"` or
+        `"sample_weight"` in our tests.
+
+    value : None, bool, or str
+        The request value to be set, by default it's `True`
+    """
+    for caller in methods:
+        for callee in method_mapping.get(caller, [caller]):
+            set_request_for_method = getattr(estimator, f"set_{callee}_request")
+            set_request_for_method(**{metadata_name: value})
+            if is_classifier(estimator) and callee == "partial_fit":
+                set_request_for_method(classes=True)
+
+
+@pytest.mark.parametrize("estimator", UNSUPPORTED_ESTIMATORS)
+def test_unsupported_estimators_get_metadata_routing(estimator):
+    """Test that get_metadata_routing is not implemented on meta-estimators for
+    which we haven't implemented routing yet."""
+    with pytest.raises(NotImplementedError):
+        estimator.get_metadata_routing()
+
+
+@pytest.mark.parametrize("estimator", UNSUPPORTED_ESTIMATORS)
+def test_unsupported_estimators_fit_with_metadata(estimator):
+    """Test that fit raises NotImplementedError when metadata routing is
+    enabled and a metadata is passed on meta-estimators for which we haven't
+    implemented routing yet."""
+    with pytest.raises(NotImplementedError):
+        try:
+            estimator.fit([[1]], [1], sample_weight=[1])
+        except TypeError:
+            # not all meta-estimators in the list support sample_weight,
+            # and for those we skip this test.
+            raise NotImplementedError
+
+
+def test_registry_copy():
+    # test that _Registry is not copied into a new instance.
+    a = _Registry()
+    b = _Registry()
+    assert a is not b
+    assert a is copy.copy(a)
+    assert a is copy.deepcopy(a)
+
+
+@pytest.mark.parametrize("metaestimator", METAESTIMATORS, ids=METAESTIMATOR_IDS)
+def test_default_request(metaestimator):
+    # Check that by default request is empty and the right type
+    cls = metaestimator["metaestimator"]
+    kwargs, *_ = get_init_args(metaestimator, sub_estimator_consumes=True)
+    instance = cls(**kwargs)
+    if "cv_name" in metaestimator:
+        # Our GroupCV splitters request groups by default, which we should
+        # ignore in this test.
+        exclude = {"splitter": ["split"]}
+    else:
+        exclude = None
+    assert_request_is_empty(instance.get_metadata_routing(), exclude=exclude)
+    assert isinstance(instance.get_metadata_routing(), MetadataRouter)
+
+
+@pytest.mark.parametrize("metaestimator", METAESTIMATORS, ids=METAESTIMATOR_IDS)
+def test_error_on_missing_requests_for_sub_estimator(metaestimator):
+    # Test that a UnsetMetadataPassedError is raised when the sub-estimator's
+    # requests are not set
+    if "estimator" not in metaestimator:
+        # This test only makes sense for metaestimators which have a
+        # sub-estimator, e.g. MyMetaEstimator(estimator=MySubEstimator())
+        return
+
+    cls = metaestimator["metaestimator"]
+    X = metaestimator["X"]
+    y = metaestimator["y"]
+    routing_methods = metaestimator["estimator_routing_methods"]
+
+    for method_name in routing_methods:
+        for key in ["sample_weight", "metadata"]:
+            kwargs, (estimator, _), (scorer, _), *_ = get_init_args(
+                metaestimator, sub_estimator_consumes=True
+            )
+            if scorer:
+                scorer.set_score_request(**{key: True})
+            val = {"sample_weight": sample_weight, "metadata": metadata}[key]
+            method_kwargs = {key: val}
+            instance = cls(**kwargs)
+            msg = (
+                f"[{key}] are passed but are not explicitly set as requested or not"
+                f" requested for {estimator.__class__.__name__}.{method_name}"
+            )
+            with pytest.raises(UnsetMetadataPassedError, match=re.escape(msg)):
+                method = getattr(instance, method_name)
+                if "fit" not in method_name:
+                    # set request on fit
+                    set_requests(
+                        estimator,
+                        method_mapping=metaestimator.get("method_mapping", {}),
+                        methods=["fit"],
+                        metadata_name=key,
+                    )
+                    instance.fit(X, y, **method_kwargs)
+                # making sure the requests are unset, in case they were set as a
+                # side effect of setting them for fit. For instance, if method
+                # mapping for fit is: `"fit": ["fit", "score"]`, that would mean
+                # calling `.score` here would not raise, because we have already
+                # set request value for child estimator's `score`.
+                set_requests(
+                    estimator,
+                    method_mapping=metaestimator.get("method_mapping", {}),
+                    methods=["fit"],
+                    metadata_name=key,
+                    value=None,
+                )
+                try:
+                    # `fit` and `partial_fit` accept y, others don't.
+                    method(X, y, **method_kwargs)
+                except TypeError:
+                    method(X, **method_kwargs)
+
+
+@pytest.mark.parametrize("metaestimator", METAESTIMATORS, ids=METAESTIMATOR_IDS)
+def test_setting_request_on_sub_estimator_removes_error(metaestimator):
+    # When the metadata is explicitly requested on the sub-estimator, there
+    # should be no errors.
+    if "estimator" not in metaestimator:
+        # This test only makes sense for metaestimators which have a
+        # sub-estimator, e.g. MyMetaEstimator(estimator=MySubEstimator())
+        return
+
+    cls = metaestimator["metaestimator"]
+    X = metaestimator["X"]
+    y = metaestimator["y"]
+    routing_methods = metaestimator["estimator_routing_methods"]
+    method_mapping = metaestimator.get("method_mapping", {})
+    preserves_metadata = metaestimator.get("preserves_metadata", True)
+
+    for method_name in routing_methods:
+        for key in ["sample_weight", "metadata"]:
+            val = {"sample_weight": sample_weight, "metadata": metadata}[key]
+            method_kwargs = {key: val}
+
+            kwargs, (estimator, registry), (scorer, _), (cv, _) = get_init_args(
+                metaestimator, sub_estimator_consumes=True
+            )
+            if scorer:
+                set_requests(
+                    scorer, method_mapping={}, methods=["score"], metadata_name=key
+                )
+            if cv:
+                cv.set_split_request(groups=True, metadata=True)
+
+            # `set_{method}_request({metadata}==True)` on the underlying objects
+            set_requests(
+                estimator,
+                method_mapping=method_mapping,
+                methods=[method_name],
+                metadata_name=key,
+            )
+
+            instance = cls(**kwargs)
+            method = getattr(instance, method_name)
+            extra_method_args = metaestimator.get("method_args", {}).get(
+                method_name, {}
+            )
+            if "fit" not in method_name:
+                # fit before calling method
+                set_requests(
+                    estimator,
+                    method_mapping=metaestimator.get("method_mapping", {}),
+                    methods=["fit"],
+                    metadata_name=key,
+                )
+                instance.fit(X, y, **method_kwargs, **extra_method_args)
+            try:
+                # `fit` and `partial_fit` accept y, others don't.
+                method(X, y, **method_kwargs, **extra_method_args)
+            except TypeError:
+                method(X, **method_kwargs, **extra_method_args)
+
+            # sanity check that registry is not empty, or else the test passes
+            # trivially
+            assert registry
+            if preserves_metadata is True:
+                for estimator in registry:
+                    check_recorded_metadata(estimator, method_name, **method_kwargs)
+            elif preserves_metadata == "subset":
+                for estimator in registry:
+                    check_recorded_metadata(
+                        estimator,
+                        method_name,
+                        split_params=method_kwargs.keys(),
+                        **method_kwargs,
+                    )
+
+
+@pytest.mark.parametrize("metaestimator", METAESTIMATORS, ids=METAESTIMATOR_IDS)
+def test_non_consuming_estimator_works(metaestimator):
+    # Test that when a non-consuming estimator is given, the meta-estimator
+    # works w/o setting any requests.
+    # Regression test for https://github.com/scikit-learn/scikit-learn/issues/28239
+    if "estimator" not in metaestimator:
+        # This test only makes sense for metaestimators which have a
+        # sub-estimator, e.g. MyMetaEstimator(estimator=MySubEstimator())
+        return
+
+    def set_request(estimator, method_name):
+        # e.g. call set_fit_request on estimator
+        if is_classifier(estimator) and method_name == "partial_fit":
+            estimator.set_partial_fit_request(classes=True)
+
+    cls = metaestimator["metaestimator"]
+    X = metaestimator["X"]
+    y = metaestimator["y"]
+    routing_methods = metaestimator["estimator_routing_methods"]
+
+    for method_name in routing_methods:
+        kwargs, (estimator, _), (_, _), (_, _) = get_init_args(
+            metaestimator, sub_estimator_consumes=False
+        )
+        instance = cls(**kwargs)
+        set_request(estimator, method_name)
+        method = getattr(instance, method_name)
+        extra_method_args = metaestimator.get("method_args", {}).get(method_name, {})
+        if "fit" not in method_name:
+            instance.fit(X, y, **extra_method_args)
+        # The following should pass w/o raising a routing error.
+        try:
+            # `fit` and `partial_fit` accept y, others don't.
+            method(X, y, **extra_method_args)
+        except TypeError:
+            method(X, **extra_method_args)
+
+
+@pytest.mark.parametrize("metaestimator", METAESTIMATORS, ids=METAESTIMATOR_IDS)
+def test_metadata_is_routed_correctly_to_scorer(metaestimator):
+    """Test that any requested metadata is correctly routed to the underlying
+    scorers in CV estimators.
+    """
+    if "scorer_name" not in metaestimator:
+        # This test only makes sense for CV estimators
+        return
+
+    cls = metaestimator["metaestimator"]
+    routing_methods = metaestimator["scorer_routing_methods"]
+
+    for method_name in routing_methods:
+        kwargs, (estimator, _), (scorer, registry), (cv, _) = get_init_args(
+            metaestimator, sub_estimator_consumes=True
+        )
+        if estimator:
+            estimator.set_fit_request(sample_weight=True, metadata=True)
+        scorer.set_score_request(sample_weight=True)
+        if cv:
+            cv.set_split_request(groups=True, metadata=True)
+        instance = cls(**kwargs)
+        method = getattr(instance, method_name)
+        method_kwargs = {"sample_weight": sample_weight}
+        if "fit" not in method_name:
+            instance.fit(X, y)
+        method(X, y, **method_kwargs)
+
+        assert registry
+        for _scorer in registry:
+            check_recorded_metadata(
+                obj=_scorer,
+                method="score",
+                split_params=("sample_weight",),
+                **method_kwargs,
+            )
+
+
+@pytest.mark.parametrize("metaestimator", METAESTIMATORS, ids=METAESTIMATOR_IDS)
+def test_metadata_is_routed_correctly_to_splitter(metaestimator):
+    """Test that any requested metadata is correctly routed to the underlying
+    splitters in CV estimators.
+    """
+    if "cv_routing_methods" not in metaestimator:
+        # This test is only for metaestimators accepting a CV splitter
+        return
+
+    cls = metaestimator["metaestimator"]
+    routing_methods = metaestimator["cv_routing_methods"]
+    X_ = metaestimator["X"]
+    y_ = metaestimator["y"]
+
+    for method_name in routing_methods:
+        kwargs, (estimator, _), (scorer, _), (cv, registry) = get_init_args(
+            metaestimator, sub_estimator_consumes=True
+        )
+        if estimator:
+            estimator.set_fit_request(sample_weight=False, metadata=False)
+        if scorer:
+            scorer.set_score_request(sample_weight=False, metadata=False)
+        cv.set_split_request(groups=True, metadata=True)
+        instance = cls(**kwargs)
+        method_kwargs = {"groups": groups, "metadata": metadata}
+        method = getattr(instance, method_name)
+        method(X_, y_, **method_kwargs)
+        assert registry
+        for _splitter in registry:
+            check_recorded_metadata(obj=_splitter, method="split", **method_kwargs)
@@ -0,0 +1,137 @@
+"""Tests for the minimum dependencies in README.rst and pyproject.toml"""
+
+import os
+import platform
+import re
+from collections import defaultdict
+from pathlib import Path
+
+import pytest
+
+import sklearn
+from sklearn._min_dependencies import dependent_packages
+from sklearn.utils.fixes import parse_version
+
+min_depencies_tag_to_packages_without_version = defaultdict(list)
+for package, (min_version, extras) in dependent_packages.items():
+    for extra in extras.split(", "):
+        min_depencies_tag_to_packages_without_version[extra].append(package)
+
+min_dependencies_tag_to_pyproject_section = {
+    "build": "build-system.requires",
+    "install": "project.dependencies",
+}
+for tag in min_depencies_tag_to_packages_without_version:
+    min_dependencies_tag_to_pyproject_section[tag] = (
+        f"project.optional-dependencies.{tag}"
+    )
+
+
+def test_min_dependencies_readme():
+    # Test that the minimum dependencies in the README.rst file are
+    # consistent with the minimum dependencies defined at the file:
+    # sklearn/_min_dependencies.py
+
+    if platform.python_implementation() == "PyPy":
+        pytest.skip("PyPy does not always share the same minimum deps")
+
+    pattern = re.compile(
+        r"(\.\. \|)"
+        + r"(([A-Za-z]+\-?)+)"
+        + r"(MinVersion\| replace::)"
+        + r"( [0-9]+\.[0-9]+(\.[0-9]+)?)"
+    )
+
+    readme_path = Path(sklearn.__file__).parent.parent
+    readme_file = readme_path / "README.rst"
+
+    if not os.path.exists(readme_file):
+        # Skip the test if the README.rst file is not available.
+        # For instance, when installing scikit-learn from wheels
+        pytest.skip("The README.rst file is not available.")
+
+    with readme_file.open("r") as f:
+        for line in f:
+            matched = pattern.match(line)
+
+            if not matched:
+                continue
+
+            package, version = matched.group(2), matched.group(5)
+            package = package.lower()
+
+            if package in dependent_packages:
+                version = parse_version(version)
+                min_version = parse_version(dependent_packages[package][0])
+
+                assert version == min_version, f"{package} has a mismatched version"
+
+
+def check_pyproject_section(
+    pyproject_section, min_dependencies_tag, skip_version_check_for=None
+):
+    # tomllib is available in Python 3.11
+    tomllib = pytest.importorskip("tomllib")
+
+    if skip_version_check_for is None:
+        skip_version_check_for = []
+
+    expected_packages = min_depencies_tag_to_packages_without_version[
+        min_dependencies_tag
+    ]
+
+    root_directory = Path(sklearn.__file__).parent.parent
+    pyproject_toml_path = root_directory / "pyproject.toml"
+
+    if not pyproject_toml_path.exists():
+        # Skip the test if the pyproject.toml file is not available.
+        # For instance, when installing scikit-learn from wheels
+        pytest.skip("pyproject.toml is not available.")
+
+    with pyproject_toml_path.open("rb") as f:
+        pyproject_toml = tomllib.load(f)
+
+    pyproject_section_keys = pyproject_section.split(".")
+    info = pyproject_toml
+    for key in pyproject_section_keys:
+        info = info[key]
+
+    pyproject_build_min_versions = {}
+    for requirement in info:
+        if ">=" in requirement:
+            package, version = requirement.split(">=")
+        elif "==" in requirement:
+            package, version = requirement.split("==")
+        else:
+            raise NotImplementedError(
+                f"{requirement} not supported yet in this test. "
+                "Only >= and == are supported for version requirements"
+            )
+
+        pyproject_build_min_versions[package] = version
+
+    assert sorted(pyproject_build_min_versions) == sorted(expected_packages)
+
+    for package, version in pyproject_build_min_versions.items():
+        version = parse_version(version)
+        expected_min_version = parse_version(dependent_packages[package][0])
+        if package in skip_version_check_for:
+            continue
+
+        assert version == expected_min_version, f"{package} has a mismatched version"
+
+
+@pytest.mark.parametrize(
+    "min_dependencies_tag, pyproject_section",
+    min_dependencies_tag_to_pyproject_section.items(),
+)
+def test_min_dependencies_pyproject_toml(pyproject_section, min_dependencies_tag):
+    """Check versions in pyproject.toml is consistent with _min_dependencies."""
+    # NumPy is more complex because build-time (>=1.25) and run-time (>=1.19.5)
+    # requirement currently don't match
+    skip_version_check_for = ["numpy"] if min_dependencies_tag == "build" else None
+    check_pyproject_section(
+        pyproject_section,
+        min_dependencies_tag,
+        skip_version_check_for=skip_version_check_for,
+    )
@@ -0,0 +1,948 @@
+from re import escape
+
+import numpy as np
+import pytest
+import scipy.sparse as sp
+from numpy.testing import assert_allclose
+
+from sklearn import datasets, svm
+from sklearn.datasets import load_breast_cancer
+from sklearn.exceptions import NotFittedError
+from sklearn.impute import SimpleImputer
+from sklearn.linear_model import (
+    ElasticNet,
+    Lasso,
+    LinearRegression,
+    LogisticRegression,
+    Perceptron,
+    Ridge,
+    SGDClassifier,
+)
+from sklearn.metrics import precision_score, recall_score
+from sklearn.model_selection import GridSearchCV, cross_val_score
+from sklearn.multiclass import (
+    OneVsOneClassifier,
+    OneVsRestClassifier,
+    OutputCodeClassifier,
+)
+from sklearn.naive_bayes import MultinomialNB
+from sklearn.neighbors import KNeighborsClassifier
+from sklearn.pipeline import Pipeline, make_pipeline
+from sklearn.svm import SVC, LinearSVC
+from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
+from sklearn.utils import (
+    check_array,
+    shuffle,
+)
+from sklearn.utils._mocking import CheckingClassifier
+from sklearn.utils._testing import assert_almost_equal, assert_array_equal
+from sklearn.utils.fixes import (
+    COO_CONTAINERS,
+    CSC_CONTAINERS,
+    CSR_CONTAINERS,
+    DOK_CONTAINERS,
+    LIL_CONTAINERS,
+)
+from sklearn.utils.multiclass import check_classification_targets, type_of_target
+
+msg = "The default value for `force_alpha` will change"
+pytestmark = pytest.mark.filterwarnings(f"ignore:{msg}:FutureWarning")
+
+iris = datasets.load_iris()
+rng = np.random.RandomState(0)
+perm = rng.permutation(iris.target.size)
+iris.data = iris.data[perm]
+iris.target = iris.target[perm]
+n_classes = 3
+
+
+def test_ovr_exceptions():
+    ovr = OneVsRestClassifier(LinearSVC(random_state=0))
+
+    # test predicting without fitting
+    with pytest.raises(NotFittedError):
+        ovr.predict([])
+
+    # Fail on multioutput data
+    msg = "Multioutput target data is not supported with label binarization"
+    with pytest.raises(ValueError, match=msg):
+        X = np.array([[1, 0], [0, 1]])
+        y = np.array([[1, 2], [3, 1]])
+        OneVsRestClassifier(MultinomialNB()).fit(X, y)
+
+    with pytest.raises(ValueError, match=msg):
+        X = np.array([[1, 0], [0, 1]])
+        y = np.array([[1.5, 2.4], [3.1, 0.8]])
+        OneVsRestClassifier(MultinomialNB()).fit(X, y)
+
+
+def test_check_classification_targets():
+    # Test that check_classification_target return correct type. #5782
+    y = np.array([0.0, 1.1, 2.0, 3.0])
+    msg = type_of_target(y)
+    with pytest.raises(ValueError, match=msg):
+        check_classification_targets(y)
+
+
+def test_ovr_fit_predict():
+    # A classifier which implements decision_function.
+    ovr = OneVsRestClassifier(LinearSVC(random_state=0))
+    pred = ovr.fit(iris.data, iris.target).predict(iris.data)
+    assert len(ovr.estimators_) == n_classes
+
+    clf = LinearSVC(random_state=0)
+    pred2 = clf.fit(iris.data, iris.target).predict(iris.data)
+    assert np.mean(iris.target == pred) == np.mean(iris.target == pred2)
+
+    # A classifier which implements predict_proba.
+    ovr = OneVsRestClassifier(MultinomialNB())
+    pred = ovr.fit(iris.data, iris.target).predict(iris.data)
+    assert np.mean(iris.target == pred) > 0.65
+
+
+def test_ovr_partial_fit():
+    # Test if partial_fit is working as intended
+    X, y = shuffle(iris.data, iris.target, random_state=0)
+    ovr = OneVsRestClassifier(MultinomialNB())
+    ovr.partial_fit(X[:100], y[:100], np.unique(y))
+    ovr.partial_fit(X[100:], y[100:])
+    pred = ovr.predict(X)
+    ovr2 = OneVsRestClassifier(MultinomialNB())
+    pred2 = ovr2.fit(X, y).predict(X)
+
+    assert_almost_equal(pred, pred2)
+    assert len(ovr.estimators_) == len(np.unique(y))
+    assert np.mean(y == pred) > 0.65
+
+    # Test when mini batches doesn't have all classes
+    # with SGDClassifier
+    X = np.abs(np.random.randn(14, 2))
+    y = [1, 1, 1, 1, 2, 3, 3, 0, 0, 2, 3, 1, 2, 3]
+
+    ovr = OneVsRestClassifier(
+        SGDClassifier(max_iter=1, tol=None, shuffle=False, random_state=0)
+    )
+    ovr.partial_fit(X[:7], y[:7], np.unique(y))
+    ovr.partial_fit(X[7:], y[7:])
+    pred = ovr.predict(X)
+    ovr1 = OneVsRestClassifier(
+        SGDClassifier(max_iter=1, tol=None, shuffle=False, random_state=0)
+    )
+    pred1 = ovr1.fit(X, y).predict(X)
+    assert np.mean(pred == y) == np.mean(pred1 == y)
+
+    # test partial_fit only exists if estimator has it:
+    ovr = OneVsRestClassifier(SVC())
+    assert not hasattr(ovr, "partial_fit")
+
+
+def test_ovr_partial_fit_exceptions():
+    ovr = OneVsRestClassifier(MultinomialNB())
+    X = np.abs(np.random.randn(14, 2))
+    y = [1, 1, 1, 1, 2, 3, 3, 0, 0, 2, 3, 1, 2, 3]
+    ovr.partial_fit(X[:7], y[:7], np.unique(y))
+    # If a new class that was not in the first call of partial fit is seen
+    # it should raise ValueError
+    y1 = [5] + y[7:-1]
+    msg = r"Mini-batch contains \[.+\] while classes must be subset of \[.+\]"
+    with pytest.raises(ValueError, match=msg):
+        ovr.partial_fit(X=X[7:], y=y1)
+
+
+def test_ovr_ovo_regressor():
+    # test that ovr and ovo work on regressors which don't have a decision_
+    # function
+    ovr = OneVsRestClassifier(DecisionTreeRegressor())
+    pred = ovr.fit(iris.data, iris.target).predict(iris.data)
+    assert len(ovr.estimators_) == n_classes
+    assert_array_equal(np.unique(pred), [0, 1, 2])
+    # we are doing something sensible
+    assert np.mean(pred == iris.target) > 0.9
+
+    ovr = OneVsOneClassifier(DecisionTreeRegressor())
+    pred = ovr.fit(iris.data, iris.target).predict(iris.data)
+    assert len(ovr.estimators_) == n_classes * (n_classes - 1) / 2
+    assert_array_equal(np.unique(pred), [0, 1, 2])
+    # we are doing something sensible
+    assert np.mean(pred == iris.target) > 0.9
+
+
+@pytest.mark.parametrize(
+    "sparse_container",
+    CSR_CONTAINERS + CSC_CONTAINERS + COO_CONTAINERS + DOK_CONTAINERS + LIL_CONTAINERS,
+)
+def test_ovr_fit_predict_sparse(sparse_container):
+    base_clf = MultinomialNB(alpha=1)
+
+    X, Y = datasets.make_multilabel_classification(
+        n_samples=100,
+        n_features=20,
+        n_classes=5,
+        n_labels=3,
+        length=50,
+        allow_unlabeled=True,
+        random_state=0,
+    )
+
+    X_train, Y_train = X[:80], Y[:80]
+    X_test = X[80:]
+
+    clf = OneVsRestClassifier(base_clf).fit(X_train, Y_train)
+    Y_pred = clf.predict(X_test)
+
+    clf_sprs = OneVsRestClassifier(base_clf).fit(X_train, sparse_container(Y_train))
+    Y_pred_sprs = clf_sprs.predict(X_test)
+
+    assert clf.multilabel_
+    assert sp.issparse(Y_pred_sprs)
+    assert_array_equal(Y_pred_sprs.toarray(), Y_pred)
+
+    # Test predict_proba
+    Y_proba = clf_sprs.predict_proba(X_test)
+
+    # predict assigns a label if the probability that the
+    # sample has the label is greater than 0.5.
+    pred = Y_proba > 0.5
+    assert_array_equal(pred, Y_pred_sprs.toarray())
+
+    # Test decision_function
+    clf = svm.SVC()
+    clf_sprs = OneVsRestClassifier(clf).fit(X_train, sparse_container(Y_train))
+    dec_pred = (clf_sprs.decision_function(X_test) > 0).astype(int)
+    assert_array_equal(dec_pred, clf_sprs.predict(X_test).toarray())
+
+
+def test_ovr_always_present():
+    # Test that ovr works with classes that are always present or absent.
+    # Note: tests is the case where _ConstantPredictor is utilised
+    X = np.ones((10, 2))
+    X[:5, :] = 0
+
+    # Build an indicator matrix where two features are always on.
+    # As list of lists, it would be: [[int(i >= 5), 2, 3] for i in range(10)]
+    y = np.zeros((10, 3))
+    y[5:, 0] = 1
+    y[:, 1] = 1
+    y[:, 2] = 1
+
+    ovr = OneVsRestClassifier(LogisticRegression())
+    msg = r"Label .+ is present in all training examples"
+    with pytest.warns(UserWarning, match=msg):
+        ovr.fit(X, y)
+    y_pred = ovr.predict(X)
+    assert_array_equal(np.array(y_pred), np.array(y))
+    y_pred = ovr.decision_function(X)
+    assert np.unique(y_pred[:, -2:]) == 1
+    y_pred = ovr.predict_proba(X)
+    assert_array_equal(y_pred[:, -1], np.ones(X.shape[0]))
+
+    # y has a constantly absent label
+    y = np.zeros((10, 2))
+    y[5:, 0] = 1  # variable label
+    ovr = OneVsRestClassifier(LogisticRegression())
+
+    msg = r"Label not 1 is present in all training examples"
+    with pytest.warns(UserWarning, match=msg):
+        ovr.fit(X, y)
+    y_pred = ovr.predict_proba(X)
+    assert_array_equal(y_pred[:, -1], np.zeros(X.shape[0]))
+
+
+def test_ovr_multiclass():
+    # Toy dataset where features correspond directly to labels.
+    X = np.array([[0, 0, 5], [0, 5, 0], [3, 0, 0], [0, 0, 6], [6, 0, 0]])
+    y = ["eggs", "spam", "ham", "eggs", "ham"]
+    Y = np.array([[0, 0, 1], [0, 1, 0], [1, 0, 0], [0, 0, 1], [1, 0, 0]])
+
+    classes = set("ham eggs spam".split())
+
+    for base_clf in (
+        MultinomialNB(),
+        LinearSVC(random_state=0),
+        LinearRegression(),
+        Ridge(),
+        ElasticNet(),
+    ):
+        clf = OneVsRestClassifier(base_clf).fit(X, y)
+        assert set(clf.classes_) == classes
+        y_pred = clf.predict(np.array([[0, 0, 4]]))[0]
+        assert_array_equal(y_pred, ["eggs"])
+
+        # test input as label indicator matrix
+        clf = OneVsRestClassifier(base_clf).fit(X, Y)
+        y_pred = clf.predict([[0, 0, 4]])[0]
+        assert_array_equal(y_pred, [0, 0, 1])
+
+
+def test_ovr_binary():
+    # Toy dataset where features correspond directly to labels.
+    X = np.array([[0, 0, 5], [0, 5, 0], [3, 0, 0], [0, 0, 6], [6, 0, 0]])
+    y = ["eggs", "spam", "spam", "eggs", "spam"]
+    Y = np.array([[0, 1, 1, 0, 1]]).T
+
+    classes = set("eggs spam".split())
+
+    def conduct_test(base_clf, test_predict_proba=False):
+        clf = OneVsRestClassifier(base_clf).fit(X, y)
+        assert set(clf.classes_) == classes
+        y_pred = clf.predict(np.array([[0, 0, 4]]))[0]
+        assert_array_equal(y_pred, ["eggs"])
+        if hasattr(base_clf, "decision_function"):
+            dec = clf.decision_function(X)
+            assert dec.shape == (5,)
+
+        if test_predict_proba:
+            X_test = np.array([[0, 0, 4]])
+            probabilities = clf.predict_proba(X_test)
+            assert 2 == len(probabilities[0])
+            assert clf.classes_[np.argmax(probabilities, axis=1)] == clf.predict(X_test)
+
+        # test input as label indicator matrix
+        clf = OneVsRestClassifier(base_clf).fit(X, Y)
+        y_pred = clf.predict([[3, 0, 0]])[0]
+        assert y_pred == 1
+
+    for base_clf in (
+        LinearSVC(random_state=0),
+        LinearRegression(),
+        Ridge(),
+        ElasticNet(),
+    ):
+        conduct_test(base_clf)
+
+    for base_clf in (MultinomialNB(), SVC(probability=True), LogisticRegression()):
+        conduct_test(base_clf, test_predict_proba=True)
+
+
+def test_ovr_multilabel():
+    # Toy dataset where features correspond directly to labels.
+    X = np.array([[0, 4, 5], [0, 5, 0], [3, 3, 3], [4, 0, 6], [6, 0, 0]])
+    y = np.array([[0, 1, 1], [0, 1, 0], [1, 1, 1], [1, 0, 1], [1, 0, 0]])
+
+    for base_clf in (
+        MultinomialNB(),
+        LinearSVC(random_state=0),
+        LinearRegression(),
+        Ridge(),
+        ElasticNet(),
+        Lasso(alpha=0.5),
+    ):
+        clf = OneVsRestClassifier(base_clf).fit(X, y)
+        y_pred = clf.predict([[0, 4, 4]])[0]
+        assert_array_equal(y_pred, [0, 1, 1])
+        assert clf.multilabel_
+
+
+def test_ovr_fit_predict_svc():
+    ovr = OneVsRestClassifier(svm.SVC())
+    ovr.fit(iris.data, iris.target)
+    assert len(ovr.estimators_) == 3
+    assert ovr.score(iris.data, iris.target) > 0.9
+
+
+def test_ovr_multilabel_dataset():
+    base_clf = MultinomialNB(alpha=1)
+    for au, prec, recall in zip((True, False), (0.51, 0.66), (0.51, 0.80)):
+        X, Y = datasets.make_multilabel_classification(
+            n_samples=100,
+            n_features=20,
+            n_classes=5,
+            n_labels=2,
+            length=50,
+            allow_unlabeled=au,
+            random_state=0,
+        )
+        X_train, Y_train = X[:80], Y[:80]
+        X_test, Y_test = X[80:], Y[80:]
+        clf = OneVsRestClassifier(base_clf).fit(X_train, Y_train)
+        Y_pred = clf.predict(X_test)
+
+        assert clf.multilabel_
+        assert_almost_equal(
+            precision_score(Y_test, Y_pred, average="micro"), prec, decimal=2
+        )
+        assert_almost_equal(
+            recall_score(Y_test, Y_pred, average="micro"), recall, decimal=2
+        )
+
+
+def test_ovr_multilabel_predict_proba():
+    base_clf = MultinomialNB(alpha=1)
+    for au in (False, True):
+        X, Y = datasets.make_multilabel_classification(
+            n_samples=100,
+            n_features=20,
+            n_classes=5,
+            n_labels=3,
+            length=50,
+            allow_unlabeled=au,
+            random_state=0,
+        )
+        X_train, Y_train = X[:80], Y[:80]
+        X_test = X[80:]
+        clf = OneVsRestClassifier(base_clf).fit(X_train, Y_train)
+
+        # Decision function only estimator.
+        decision_only = OneVsRestClassifier(svm.SVR()).fit(X_train, Y_train)
+        assert not hasattr(decision_only, "predict_proba")
+
+        # Estimator with predict_proba disabled, depending on parameters.
+        decision_only = OneVsRestClassifier(svm.SVC(probability=False))
+        assert not hasattr(decision_only, "predict_proba")
+        decision_only.fit(X_train, Y_train)
+        assert not hasattr(decision_only, "predict_proba")
+        assert hasattr(decision_only, "decision_function")
+
+        # Estimator which can get predict_proba enabled after fitting
+        gs = GridSearchCV(
+            svm.SVC(probability=False), param_grid={"probability": [True]}
+        )
+        proba_after_fit = OneVsRestClassifier(gs)
+        assert not hasattr(proba_after_fit, "predict_proba")
+        proba_after_fit.fit(X_train, Y_train)
+        assert hasattr(proba_after_fit, "predict_proba")
+
+        Y_pred = clf.predict(X_test)
+        Y_proba = clf.predict_proba(X_test)
+
+        # predict assigns a label if the probability that the
+        # sample has the label is greater than 0.5.
+        pred = Y_proba > 0.5
+        assert_array_equal(pred, Y_pred)
+
+
+def test_ovr_single_label_predict_proba():
+    base_clf = MultinomialNB(alpha=1)
+    X, Y = iris.data, iris.target
+    X_train, Y_train = X[:80], Y[:80]
+    X_test = X[80:]
+    clf = OneVsRestClassifier(base_clf).fit(X_train, Y_train)
+
+    # Decision function only estimator.
+    decision_only = OneVsRestClassifier(svm.SVR()).fit(X_train, Y_train)
+    assert not hasattr(decision_only, "predict_proba")
+
+    Y_pred = clf.predict(X_test)
+    Y_proba = clf.predict_proba(X_test)
+
+    assert_almost_equal(Y_proba.sum(axis=1), 1.0)
+    # predict assigns a label if the probability that the
+    # sample has the label with the greatest predictive probability.
+    pred = Y_proba.argmax(axis=1)
+    assert not (pred - Y_pred).any()
+
+
+def test_ovr_multilabel_decision_function():
+    X, Y = datasets.make_multilabel_classification(
+        n_samples=100,
+        n_features=20,
+        n_classes=5,
+        n_labels=3,
+        length=50,
+        allow_unlabeled=True,
+        random_state=0,
+    )
+    X_train, Y_train = X[:80], Y[:80]
+    X_test = X[80:]
+    clf = OneVsRestClassifier(svm.SVC()).fit(X_train, Y_train)
+    assert_array_equal(
+        (clf.decision_function(X_test) > 0).astype(int), clf.predict(X_test)
+    )
+
+
+def test_ovr_single_label_decision_function():
+    X, Y = datasets.make_classification(n_samples=100, n_features=20, random_state=0)
+    X_train, Y_train = X[:80], Y[:80]
+    X_test = X[80:]
+    clf = OneVsRestClassifier(svm.SVC()).fit(X_train, Y_train)
+    assert_array_equal(clf.decision_function(X_test).ravel() > 0, clf.predict(X_test))
+
+
+def test_ovr_gridsearch():
+    ovr = OneVsRestClassifier(LinearSVC(random_state=0))
+    Cs = [0.1, 0.5, 0.8]
+    cv = GridSearchCV(ovr, {"estimator__C": Cs})
+    cv.fit(iris.data, iris.target)
+    best_C = cv.best_estimator_.estimators_[0].C
+    assert best_C in Cs
+
+
+def test_ovr_pipeline():
+    # Test with pipeline of length one
+    # This test is needed because the multiclass estimators may fail to detect
+    # the presence of predict_proba or decision_function.
+    clf = Pipeline([("tree", DecisionTreeClassifier())])
+    ovr_pipe = OneVsRestClassifier(clf)
+    ovr_pipe.fit(iris.data, iris.target)
+    ovr = OneVsRestClassifier(DecisionTreeClassifier())
+    ovr.fit(iris.data, iris.target)
+    assert_array_equal(ovr.predict(iris.data), ovr_pipe.predict(iris.data))
+
+
+def test_ovo_exceptions():
+    ovo = OneVsOneClassifier(LinearSVC(random_state=0))
+    with pytest.raises(NotFittedError):
+        ovo.predict([])
+
+
+def test_ovo_fit_on_list():
+    # Test that OneVsOne fitting works with a list of targets and yields the
+    # same output as predict from an array
+    ovo = OneVsOneClassifier(LinearSVC(random_state=0))
+    prediction_from_array = ovo.fit(iris.data, iris.target).predict(iris.data)
+    iris_data_list = [list(a) for a in iris.data]
+    prediction_from_list = ovo.fit(iris_data_list, list(iris.target)).predict(
+        iris_data_list
+    )
+    assert_array_equal(prediction_from_array, prediction_from_list)
+
+
+def test_ovo_fit_predict():
+    # A classifier which implements decision_function.
+    ovo = OneVsOneClassifier(LinearSVC(random_state=0))
+    ovo.fit(iris.data, iris.target).predict(iris.data)
+    assert len(ovo.estimators_) == n_classes * (n_classes - 1) / 2
+
+    # A classifier which implements predict_proba.
+    ovo = OneVsOneClassifier(MultinomialNB())
+    ovo.fit(iris.data, iris.target).predict(iris.data)
+    assert len(ovo.estimators_) == n_classes * (n_classes - 1) / 2
+
+
+def test_ovo_partial_fit_predict():
+    temp = datasets.load_iris()
+    X, y = temp.data, temp.target
+    ovo1 = OneVsOneClassifier(MultinomialNB())
+    ovo1.partial_fit(X[:100], y[:100], np.unique(y))
+    ovo1.partial_fit(X[100:], y[100:])
+    pred1 = ovo1.predict(X)
+
+    ovo2 = OneVsOneClassifier(MultinomialNB())
+    ovo2.fit(X, y)
+    pred2 = ovo2.predict(X)
+    assert len(ovo1.estimators_) == n_classes * (n_classes - 1) / 2
+    assert np.mean(y == pred1) > 0.65
+    assert_almost_equal(pred1, pred2)
+
+    # Test when mini-batches have binary target classes
+    ovo1 = OneVsOneClassifier(MultinomialNB())
+    ovo1.partial_fit(X[:60], y[:60], np.unique(y))
+    ovo1.partial_fit(X[60:], y[60:])
+    pred1 = ovo1.predict(X)
+    ovo2 = OneVsOneClassifier(MultinomialNB())
+    pred2 = ovo2.fit(X, y).predict(X)
+
+    assert_almost_equal(pred1, pred2)
+    assert len(ovo1.estimators_) == len(np.unique(y))
+    assert np.mean(y == pred1) > 0.65
+
+    ovo = OneVsOneClassifier(MultinomialNB())
+    X = np.random.rand(14, 2)
+    y = [1, 1, 2, 3, 3, 0, 0, 4, 4, 4, 4, 4, 2, 2]
+    ovo.partial_fit(X[:7], y[:7], [0, 1, 2, 3, 4])
+    ovo.partial_fit(X[7:], y[7:])
+    pred = ovo.predict(X)
+    ovo2 = OneVsOneClassifier(MultinomialNB())
+    pred2 = ovo2.fit(X, y).predict(X)
+    assert_almost_equal(pred, pred2)
+
+    # raises error when mini-batch does not have classes from all_classes
+    ovo = OneVsOneClassifier(MultinomialNB())
+    error_y = [0, 1, 2, 3, 4, 5, 2]
+    message_re = escape(
+        "Mini-batch contains {0} while it must be subset of {1}".format(
+            np.unique(error_y), np.unique(y)
+        )
+    )
+    with pytest.raises(ValueError, match=message_re):
+        ovo.partial_fit(X[:7], error_y, np.unique(y))
+
+    # test partial_fit only exists if estimator has it:
+    ovr = OneVsOneClassifier(SVC())
+    assert not hasattr(ovr, "partial_fit")
+
+
+def test_ovo_decision_function():
+    n_samples = iris.data.shape[0]
+
+    ovo_clf = OneVsOneClassifier(LinearSVC(random_state=0))
+    # first binary
+    ovo_clf.fit(iris.data, iris.target == 0)
+    decisions = ovo_clf.decision_function(iris.data)
+    assert decisions.shape == (n_samples,)
+
+    # then multi-class
+    ovo_clf.fit(iris.data, iris.target)
+    decisions = ovo_clf.decision_function(iris.data)
+
+    assert decisions.shape == (n_samples, n_classes)
+    assert_array_equal(decisions.argmax(axis=1), ovo_clf.predict(iris.data))
+
+    # Compute the votes
+    votes = np.zeros((n_samples, n_classes))
+
+    k = 0
+    for i in range(n_classes):
+        for j in range(i + 1, n_classes):
+            pred = ovo_clf.estimators_[k].predict(iris.data)
+            votes[pred == 0, i] += 1
+            votes[pred == 1, j] += 1
+            k += 1
+
+    # Extract votes and verify
+    assert_array_equal(votes, np.round(decisions))
+
+    for class_idx in range(n_classes):
+        # For each sample and each class, there only 3 possible vote levels
+        # because they are only 3 distinct class pairs thus 3 distinct
+        # binary classifiers.
+        # Therefore, sorting predictions based on votes would yield
+        # mostly tied predictions:
+        assert set(votes[:, class_idx]).issubset(set([0.0, 1.0, 2.0]))
+
+        # The OVO decision function on the other hand is able to resolve
+        # most of the ties on this data as it combines both the vote counts
+        # and the aggregated confidence levels of the binary classifiers
+        # to compute the aggregate decision function. The iris dataset
+        # has 150 samples with a couple of duplicates. The OvO decisions
+        # can resolve most of the ties:
+        assert len(np.unique(decisions[:, class_idx])) > 146
+
+
+def test_ovo_gridsearch():
+    ovo = OneVsOneClassifier(LinearSVC(random_state=0))
+    Cs = [0.1, 0.5, 0.8]
+    cv = GridSearchCV(ovo, {"estimator__C": Cs})
+    cv.fit(iris.data, iris.target)
+    best_C = cv.best_estimator_.estimators_[0].C
+    assert best_C in Cs
+
+
+def test_ovo_ties():
+    # Test that ties are broken using the decision function,
+    # not defaulting to the smallest label
+    X = np.array([[1, 2], [2, 1], [-2, 1], [-2, -1]])
+    y = np.array([2, 0, 1, 2])
+    multi_clf = OneVsOneClassifier(Perceptron(shuffle=False, max_iter=4, tol=None))
+    ovo_prediction = multi_clf.fit(X, y).predict(X)
+    ovo_decision = multi_clf.decision_function(X)
+
+    # Classifiers are in order 0-1, 0-2, 1-2
+    # Use decision_function to compute the votes and the normalized
+    # sum_of_confidences, which is used to disambiguate when there is a tie in
+    # votes.
+    votes = np.round(ovo_decision)
+    normalized_confidences = ovo_decision - votes
+
+    # For the first point, there is one vote per class
+    assert_array_equal(votes[0, :], 1)
+    # For the rest, there is no tie and the prediction is the argmax
+    assert_array_equal(np.argmax(votes[1:], axis=1), ovo_prediction[1:])
+    # For the tie, the prediction is the class with the highest score
+    assert ovo_prediction[0] == normalized_confidences[0].argmax()
+
+
+def test_ovo_ties2():
+    # test that ties can not only be won by the first two labels
+    X = np.array([[1, 2], [2, 1], [-2, 1], [-2, -1]])
+    y_ref = np.array([2, 0, 1, 2])
+
+    # cycle through labels so that each label wins once
+    for i in range(3):
+        y = (y_ref + i) % 3
+        multi_clf = OneVsOneClassifier(Perceptron(shuffle=False, max_iter=4, tol=None))
+        ovo_prediction = multi_clf.fit(X, y).predict(X)
+        assert ovo_prediction[0] == i % 3
+
+
+def test_ovo_string_y():
+    # Test that the OvO doesn't mess up the encoding of string labels
+    X = np.eye(4)
+    y = np.array(["a", "b", "c", "d"])
+
+    ovo = OneVsOneClassifier(LinearSVC())
+    ovo.fit(X, y)
+    assert_array_equal(y, ovo.predict(X))
+
+
+def test_ovo_one_class():
+    # Test error for OvO with one class
+    X = np.eye(4)
+    y = np.array(["a"] * 4)
+
+    ovo = OneVsOneClassifier(LinearSVC())
+    msg = "when only one class"
+    with pytest.raises(ValueError, match=msg):
+        ovo.fit(X, y)
+
+
+def test_ovo_float_y():
+    # Test that the OvO errors on float targets
+    X = iris.data
+    y = iris.data[:, 0]
+
+    ovo = OneVsOneClassifier(LinearSVC())
+    msg = "Unknown label type"
+    with pytest.raises(ValueError, match=msg):
+        ovo.fit(X, y)
+
+
+def test_ecoc_exceptions():
+    ecoc = OutputCodeClassifier(LinearSVC(random_state=0))
+    with pytest.raises(NotFittedError):
+        ecoc.predict([])
+
+
+def test_ecoc_fit_predict():
+    # A classifier which implements decision_function.
+    ecoc = OutputCodeClassifier(LinearSVC(random_state=0), code_size=2, random_state=0)
+    ecoc.fit(iris.data, iris.target).predict(iris.data)
+    assert len(ecoc.estimators_) == n_classes * 2
+
+    # A classifier which implements predict_proba.
+    ecoc = OutputCodeClassifier(MultinomialNB(), code_size=2, random_state=0)
+    ecoc.fit(iris.data, iris.target).predict(iris.data)
+    assert len(ecoc.estimators_) == n_classes * 2
+
+
+def test_ecoc_gridsearch():
+    ecoc = OutputCodeClassifier(LinearSVC(random_state=0), random_state=0)
+    Cs = [0.1, 0.5, 0.8]
+    cv = GridSearchCV(ecoc, {"estimator__C": Cs})
+    cv.fit(iris.data, iris.target)
+    best_C = cv.best_estimator_.estimators_[0].C
+    assert best_C in Cs
+
+
+def test_ecoc_float_y():
+    # Test that the OCC errors on float targets
+    X = iris.data
+    y = iris.data[:, 0]
+
+    ovo = OutputCodeClassifier(LinearSVC())
+    msg = "Unknown label type"
+    with pytest.raises(ValueError, match=msg):
+        ovo.fit(X, y)
+
+
+@pytest.mark.parametrize("csc_container", CSC_CONTAINERS)
+def test_ecoc_delegate_sparse_base_estimator(csc_container):
+    # Non-regression test for
+    # https://github.com/scikit-learn/scikit-learn/issues/17218
+    X, y = iris.data, iris.target
+    X_sp = csc_container(X)
+
+    # create an estimator that does not support sparse input
+    base_estimator = CheckingClassifier(
+        check_X=check_array,
+        check_X_params={"ensure_2d": True, "accept_sparse": False},
+    )
+    ecoc = OutputCodeClassifier(base_estimator, random_state=0)
+
+    with pytest.raises(TypeError, match="Sparse data was passed"):
+        ecoc.fit(X_sp, y)
+
+    ecoc.fit(X, y)
+    with pytest.raises(TypeError, match="Sparse data was passed"):
+        ecoc.predict(X_sp)
+
+    # smoke test to check when sparse input should be supported
+    ecoc = OutputCodeClassifier(LinearSVC(random_state=0))
+    ecoc.fit(X_sp, y).predict(X_sp)
+    assert len(ecoc.estimators_) == 4
+
+
+def test_pairwise_indices():
+    clf_precomputed = svm.SVC(kernel="precomputed")
+    X, y = iris.data, iris.target
+
+    ovr_false = OneVsOneClassifier(clf_precomputed)
+    linear_kernel = np.dot(X, X.T)
+    ovr_false.fit(linear_kernel, y)
+
+    n_estimators = len(ovr_false.estimators_)
+    precomputed_indices = ovr_false.pairwise_indices_
+
+    for idx in precomputed_indices:
+        assert (
+            idx.shape[0] * n_estimators / (n_estimators - 1) == linear_kernel.shape[0]
+        )
+
+
+def test_pairwise_n_features_in():
+    """Check the n_features_in_ attributes of the meta and base estimators
+
+    When the training data is a regular design matrix, everything is intuitive.
+    However, when the training data is a precomputed kernel matrix, the
+    multiclass strategy can resample the kernel matrix of the underlying base
+    estimator both row-wise and column-wise and this has a non-trivial impact
+    on the expected value for the n_features_in_ of both the meta and the base
+    estimators.
+    """
+    X, y = iris.data, iris.target
+
+    # Remove the last sample to make the classes not exactly balanced and make
+    # the test more interesting.
+    assert y[-1] == 0
+    X = X[:-1]
+    y = y[:-1]
+
+    # Fitting directly on the design matrix:
+    assert X.shape == (149, 4)
+
+    clf_notprecomputed = svm.SVC(kernel="linear").fit(X, y)
+    assert clf_notprecomputed.n_features_in_ == 4
+
+    ovr_notprecomputed = OneVsRestClassifier(clf_notprecomputed).fit(X, y)
+    assert ovr_notprecomputed.n_features_in_ == 4
+    for est in ovr_notprecomputed.estimators_:
+        assert est.n_features_in_ == 4
+
+    ovo_notprecomputed = OneVsOneClassifier(clf_notprecomputed).fit(X, y)
+    assert ovo_notprecomputed.n_features_in_ == 4
+    assert ovo_notprecomputed.n_classes_ == 3
+    assert len(ovo_notprecomputed.estimators_) == 3
+    for est in ovo_notprecomputed.estimators_:
+        assert est.n_features_in_ == 4
+
+    # When working with precomputed kernels we have one "feature" per training
+    # sample:
+    K = X @ X.T
+    assert K.shape == (149, 149)
+
+    clf_precomputed = svm.SVC(kernel="precomputed").fit(K, y)
+    assert clf_precomputed.n_features_in_ == 149
+
+    ovr_precomputed = OneVsRestClassifier(clf_precomputed).fit(K, y)
+    assert ovr_precomputed.n_features_in_ == 149
+    assert ovr_precomputed.n_classes_ == 3
+    assert len(ovr_precomputed.estimators_) == 3
+    for est in ovr_precomputed.estimators_:
+        assert est.n_features_in_ == 149
+
+    # This becomes really interesting with OvO and precomputed kernel together:
+    # internally, OvO will drop the samples of the classes not part of the pair
+    # of classes under consideration for a given binary classifier. Since we
+    # use a precomputed kernel, it will also drop the matching columns of the
+    # kernel matrix, and therefore we have fewer "features" as result.
+    #
+    # Since class 0 has 49 samples, and class 1 and 2 have 50 samples each, a
+    # single OvO binary classifier works with a sub-kernel matrix of shape
+    # either (99, 99) or (100, 100).
+    ovo_precomputed = OneVsOneClassifier(clf_precomputed).fit(K, y)
+    assert ovo_precomputed.n_features_in_ == 149
+    assert ovr_precomputed.n_classes_ == 3
+    assert len(ovr_precomputed.estimators_) == 3
+    assert ovo_precomputed.estimators_[0].n_features_in_ == 99  # class 0 vs class 1
+    assert ovo_precomputed.estimators_[1].n_features_in_ == 99  # class 0 vs class 2
+    assert ovo_precomputed.estimators_[2].n_features_in_ == 100  # class 1 vs class 2
+
+
+@pytest.mark.parametrize(
+    "MultiClassClassifier", [OneVsRestClassifier, OneVsOneClassifier]
+)
+def test_pairwise_tag(MultiClassClassifier):
+    clf_precomputed = svm.SVC(kernel="precomputed")
+    clf_notprecomputed = svm.SVC()
+
+    ovr_false = MultiClassClassifier(clf_notprecomputed)
+    assert not ovr_false._get_tags()["pairwise"]
+
+    ovr_true = MultiClassClassifier(clf_precomputed)
+    assert ovr_true._get_tags()["pairwise"]
+
+
+@pytest.mark.parametrize(
+    "MultiClassClassifier", [OneVsRestClassifier, OneVsOneClassifier]
+)
+def test_pairwise_cross_val_score(MultiClassClassifier):
+    clf_precomputed = svm.SVC(kernel="precomputed")
+    clf_notprecomputed = svm.SVC(kernel="linear")
+
+    X, y = iris.data, iris.target
+
+    multiclass_clf_notprecomputed = MultiClassClassifier(clf_notprecomputed)
+    multiclass_clf_precomputed = MultiClassClassifier(clf_precomputed)
+
+    linear_kernel = np.dot(X, X.T)
+    score_not_precomputed = cross_val_score(
+        multiclass_clf_notprecomputed, X, y, error_score="raise"
+    )
+    score_precomputed = cross_val_score(
+        multiclass_clf_precomputed, linear_kernel, y, error_score="raise"
+    )
+    assert_array_equal(score_precomputed, score_not_precomputed)
+
+
+@pytest.mark.parametrize(
+    "MultiClassClassifier", [OneVsRestClassifier, OneVsOneClassifier]
+)
+# FIXME: we should move this test in `estimator_checks` once we are able
+# to construct meta-estimator instances
+def test_support_missing_values(MultiClassClassifier):
+    # smoke test to check that pipeline OvR and OvO classifiers are letting
+    # the validation of missing values to
+    # the underlying pipeline or classifiers
+    rng = np.random.RandomState(42)
+    X, y = iris.data, iris.target
+    X = np.copy(X)  # Copy to avoid that the original data is modified
+    mask = rng.choice([1, 0], X.shape, p=[0.1, 0.9]).astype(bool)
+    X[mask] = np.nan
+    lr = make_pipeline(SimpleImputer(), LogisticRegression(random_state=rng))
+
+    MultiClassClassifier(lr).fit(X, y).score(X, y)
+
+
+@pytest.mark.parametrize("make_y", [np.ones, np.zeros])
+def test_constant_int_target(make_y):
+    """Check that constant y target does not raise.
+
+    Non-regression test for #21869
+    """
+    X = np.ones((10, 2))
+    y = make_y((10, 1), dtype=np.int32)
+    ovr = OneVsRestClassifier(LogisticRegression())
+
+    ovr.fit(X, y)
+    y_pred = ovr.predict_proba(X)
+    expected = np.zeros((X.shape[0], 2))
+    expected[:, 0] = 1
+    assert_allclose(y_pred, expected)
+
+
+def test_ovo_consistent_binary_classification():
+    """Check that ovo is consistent with binary classifier.
+
+    Non-regression test for #13617.
+    """
+    X, y = load_breast_cancer(return_X_y=True)
+
+    clf = KNeighborsClassifier(n_neighbors=8, weights="distance")
+    ovo = OneVsOneClassifier(clf)
+
+    clf.fit(X, y)
+    ovo.fit(X, y)
+
+    assert_array_equal(clf.predict(X), ovo.predict(X))
+
+
+def test_multiclass_estimator_attribute_error():
+    """Check that we raise the proper AttributeError when the final estimator
+    does not implement the `partial_fit` method, which is decorated with
+    `available_if`.
+
+    Non-regression test for:
+    https://github.com/scikit-learn/scikit-learn/issues/28108
+    """
+    iris = datasets.load_iris()
+
+    # LogisticRegression does not implement 'partial_fit' and should raise an
+    # AttributeError
+    clf = OneVsRestClassifier(estimator=LogisticRegression(random_state=42))
+
+    outer_msg = "This 'OneVsRestClassifier' has no attribute 'partial_fit'"
+    inner_msg = "'LogisticRegression' object has no attribute 'partial_fit'"
+    with pytest.raises(AttributeError, match=outer_msg) as exec_info:
+        clf.partial_fit(iris.data, iris.target)
+    assert isinstance(exec_info.value.__cause__, AttributeError)
+    assert inner_msg in str(exec_info.value.__cause__)
@@ -0,0 +1,867 @@
+import re
+
+import numpy as np
+import pytest
+from joblib import cpu_count
+
+from sklearn import datasets
+from sklearn.base import ClassifierMixin, clone
+from sklearn.datasets import (
+    load_linnerud,
+    make_classification,
+    make_multilabel_classification,
+    make_regression,
+)
+from sklearn.dummy import DummyClassifier, DummyRegressor
+from sklearn.ensemble import (
+    GradientBoostingRegressor,
+    RandomForestClassifier,
+    StackingRegressor,
+)
+from sklearn.exceptions import NotFittedError
+from sklearn.impute import SimpleImputer
+from sklearn.linear_model import (
+    Lasso,
+    LinearRegression,
+    LogisticRegression,
+    OrthogonalMatchingPursuit,
+    PassiveAggressiveClassifier,
+    Ridge,
+    SGDClassifier,
+    SGDRegressor,
+)
+from sklearn.metrics import jaccard_score, mean_squared_error
+from sklearn.model_selection import GridSearchCV, train_test_split
+from sklearn.multiclass import OneVsRestClassifier
+from sklearn.multioutput import (
+    ClassifierChain,
+    MultiOutputClassifier,
+    MultiOutputRegressor,
+    RegressorChain,
+)
+from sklearn.pipeline import make_pipeline
+from sklearn.svm import LinearSVC
+from sklearn.tree import DecisionTreeClassifier
+from sklearn.utils import shuffle
+from sklearn.utils._testing import (
+    assert_almost_equal,
+    assert_array_almost_equal,
+    assert_array_equal,
+)
+from sklearn.utils.fixes import (
+    BSR_CONTAINERS,
+    COO_CONTAINERS,
+    CSC_CONTAINERS,
+    CSR_CONTAINERS,
+    DOK_CONTAINERS,
+    LIL_CONTAINERS,
+)
+
+
+def test_multi_target_regression():
+    X, y = datasets.make_regression(n_targets=3, random_state=0)
+    X_train, y_train = X[:50], y[:50]
+    X_test, y_test = X[50:], y[50:]
+
+    references = np.zeros_like(y_test)
+    for n in range(3):
+        rgr = GradientBoostingRegressor(random_state=0)
+        rgr.fit(X_train, y_train[:, n])
+        references[:, n] = rgr.predict(X_test)
+
+    rgr = MultiOutputRegressor(GradientBoostingRegressor(random_state=0))
+    rgr.fit(X_train, y_train)
+    y_pred = rgr.predict(X_test)
+
+    assert_almost_equal(references, y_pred)
+
+
+def test_multi_target_regression_partial_fit():
+    X, y = datasets.make_regression(n_targets=3, random_state=0)
+    X_train, y_train = X[:50], y[:50]
+    X_test, y_test = X[50:], y[50:]
+
+    references = np.zeros_like(y_test)
+    half_index = 25
+    for n in range(3):
+        sgr = SGDRegressor(random_state=0, max_iter=5)
+        sgr.partial_fit(X_train[:half_index], y_train[:half_index, n])
+        sgr.partial_fit(X_train[half_index:], y_train[half_index:, n])
+        references[:, n] = sgr.predict(X_test)
+
+    sgr = MultiOutputRegressor(SGDRegressor(random_state=0, max_iter=5))
+
+    sgr.partial_fit(X_train[:half_index], y_train[:half_index])
+    sgr.partial_fit(X_train[half_index:], y_train[half_index:])
+
+    y_pred = sgr.predict(X_test)
+    assert_almost_equal(references, y_pred)
+    assert not hasattr(MultiOutputRegressor(Lasso), "partial_fit")
+
+
+def test_multi_target_regression_one_target():
+    # Test multi target regression raises
+    X, y = datasets.make_regression(n_targets=1, random_state=0)
+    rgr = MultiOutputRegressor(GradientBoostingRegressor(random_state=0))
+    msg = "at least two dimensions"
+    with pytest.raises(ValueError, match=msg):
+        rgr.fit(X, y)
+
+
+@pytest.mark.parametrize(
+    "sparse_container",
+    CSR_CONTAINERS
+    + CSC_CONTAINERS
+    + COO_CONTAINERS
+    + LIL_CONTAINERS
+    + DOK_CONTAINERS
+    + BSR_CONTAINERS,
+)
+def test_multi_target_sparse_regression(sparse_container):
+    X, y = datasets.make_regression(n_targets=3, random_state=0)
+    X_train, y_train = X[:50], y[:50]
+    X_test = X[50:]
+
+    rgr = MultiOutputRegressor(Lasso(random_state=0))
+    rgr_sparse = MultiOutputRegressor(Lasso(random_state=0))
+
+    rgr.fit(X_train, y_train)
+    rgr_sparse.fit(sparse_container(X_train), y_train)
+
+    assert_almost_equal(
+        rgr.predict(X_test), rgr_sparse.predict(sparse_container(X_test))
+    )
+
+
+def test_multi_target_sample_weights_api():
+    X = [[1, 2, 3], [4, 5, 6]]
+    y = [[3.141, 2.718], [2.718, 3.141]]
+    w = [0.8, 0.6]
+
+    rgr = MultiOutputRegressor(OrthogonalMatchingPursuit())
+    msg = "does not support sample weights"
+    with pytest.raises(ValueError, match=msg):
+        rgr.fit(X, y, w)
+
+    # no exception should be raised if the base estimator supports weights
+    rgr = MultiOutputRegressor(GradientBoostingRegressor(random_state=0))
+    rgr.fit(X, y, w)
+
+
+def test_multi_target_sample_weight_partial_fit():
+    # weighted regressor
+    X = [[1, 2, 3], [4, 5, 6]]
+    y = [[3.141, 2.718], [2.718, 3.141]]
+    w = [2.0, 1.0]
+    rgr_w = MultiOutputRegressor(SGDRegressor(random_state=0, max_iter=5))
+    rgr_w.partial_fit(X, y, w)
+
+    # weighted with different weights
+    w = [2.0, 2.0]
+    rgr = MultiOutputRegressor(SGDRegressor(random_state=0, max_iter=5))
+    rgr.partial_fit(X, y, w)
+
+    assert rgr.predict(X)[0][0] != rgr_w.predict(X)[0][0]
+
+
+def test_multi_target_sample_weights():
+    # weighted regressor
+    Xw = [[1, 2, 3], [4, 5, 6]]
+    yw = [[3.141, 2.718], [2.718, 3.141]]
+    w = [2.0, 1.0]
+    rgr_w = MultiOutputRegressor(GradientBoostingRegressor(random_state=0))
+    rgr_w.fit(Xw, yw, w)
+
+    # unweighted, but with repeated samples
+    X = [[1, 2, 3], [1, 2, 3], [4, 5, 6]]
+    y = [[3.141, 2.718], [3.141, 2.718], [2.718, 3.141]]
+    rgr = MultiOutputRegressor(GradientBoostingRegressor(random_state=0))
+    rgr.fit(X, y)
+
+    X_test = [[1.5, 2.5, 3.5], [3.5, 4.5, 5.5]]
+    assert_almost_equal(rgr.predict(X_test), rgr_w.predict(X_test))
+
+
+# Import the data
+iris = datasets.load_iris()
+# create a multiple targets by randomized shuffling and concatenating y.
+X = iris.data
+y1 = iris.target
+y2 = shuffle(y1, random_state=1)
+y3 = shuffle(y1, random_state=2)
+y = np.column_stack((y1, y2, y3))
+n_samples, n_features = X.shape
+n_outputs = y.shape[1]
+n_classes = len(np.unique(y1))
+classes = list(map(np.unique, (y1, y2, y3)))
+
+
+def test_multi_output_classification_partial_fit_parallelism():
+    sgd_linear_clf = SGDClassifier(loss="log_loss", random_state=1, max_iter=5)
+    mor = MultiOutputClassifier(sgd_linear_clf, n_jobs=4)
+    mor.partial_fit(X, y, classes)
+    est1 = mor.estimators_[0]
+    mor.partial_fit(X, y)
+    est2 = mor.estimators_[0]
+    if cpu_count() > 1:
+        # parallelism requires this to be the case for a sane implementation
+        assert est1 is not est2
+
+
+# check multioutput has predict_proba
+def test_hasattr_multi_output_predict_proba():
+    # default SGDClassifier has loss='hinge'
+    # which does not expose a predict_proba method
+    sgd_linear_clf = SGDClassifier(random_state=1, max_iter=5)
+    multi_target_linear = MultiOutputClassifier(sgd_linear_clf)
+    multi_target_linear.fit(X, y)
+    assert not hasattr(multi_target_linear, "predict_proba")
+
+    # case where predict_proba attribute exists
+    sgd_linear_clf = SGDClassifier(loss="log_loss", random_state=1, max_iter=5)
+    multi_target_linear = MultiOutputClassifier(sgd_linear_clf)
+    multi_target_linear.fit(X, y)
+    assert hasattr(multi_target_linear, "predict_proba")
+
+
+# check predict_proba passes
+def test_multi_output_predict_proba():
+    sgd_linear_clf = SGDClassifier(random_state=1, max_iter=5)
+    param = {"loss": ("hinge", "log_loss", "modified_huber")}
+
+    # inner function for custom scoring
+    def custom_scorer(estimator, X, y):
+        if hasattr(estimator, "predict_proba"):
+            return 1.0
+        else:
+            return 0.0
+
+    grid_clf = GridSearchCV(
+        sgd_linear_clf,
+        param_grid=param,
+        scoring=custom_scorer,
+        cv=3,
+        error_score="raise",
+    )
+    multi_target_linear = MultiOutputClassifier(grid_clf)
+    multi_target_linear.fit(X, y)
+
+    multi_target_linear.predict_proba(X)
+
+    # SGDClassifier defaults to loss='hinge' which is not a probabilistic
+    # loss function; therefore it does not expose a predict_proba method
+    sgd_linear_clf = SGDClassifier(random_state=1, max_iter=5)
+    multi_target_linear = MultiOutputClassifier(sgd_linear_clf)
+    multi_target_linear.fit(X, y)
+
+    inner2_msg = "probability estimates are not available for loss='hinge'"
+    inner1_msg = "'SGDClassifier' has no attribute 'predict_proba'"
+    outer_msg = "'MultiOutputClassifier' has no attribute 'predict_proba'"
+    with pytest.raises(AttributeError, match=outer_msg) as exec_info:
+        multi_target_linear.predict_proba(X)
+
+    assert isinstance(exec_info.value.__cause__, AttributeError)
+    assert inner1_msg in str(exec_info.value.__cause__)
+
+    assert isinstance(exec_info.value.__cause__.__cause__, AttributeError)
+    assert inner2_msg in str(exec_info.value.__cause__.__cause__)
+
+
+def test_multi_output_classification_partial_fit():
+    # test if multi_target initializes correctly with base estimator and fit
+    # assert predictions work as expected for predict
+
+    sgd_linear_clf = SGDClassifier(loss="log_loss", random_state=1, max_iter=5)
+    multi_target_linear = MultiOutputClassifier(sgd_linear_clf)
+
+    # train the multi_target_linear and also get the predictions.
+    half_index = X.shape[0] // 2
+    multi_target_linear.partial_fit(X[:half_index], y[:half_index], classes=classes)
+
+    first_predictions = multi_target_linear.predict(X)
+    assert (n_samples, n_outputs) == first_predictions.shape
+
+    multi_target_linear.partial_fit(X[half_index:], y[half_index:])
+    second_predictions = multi_target_linear.predict(X)
+    assert (n_samples, n_outputs) == second_predictions.shape
+
+    # train the linear classification with each column and assert that
+    # predictions are equal after first partial_fit and second partial_fit
+    for i in range(3):
+        # create a clone with the same state
+        sgd_linear_clf = clone(sgd_linear_clf)
+        sgd_linear_clf.partial_fit(
+            X[:half_index], y[:half_index, i], classes=classes[i]
+        )
+        assert_array_equal(sgd_linear_clf.predict(X), first_predictions[:, i])
+        sgd_linear_clf.partial_fit(X[half_index:], y[half_index:, i])
+        assert_array_equal(sgd_linear_clf.predict(X), second_predictions[:, i])
+
+
+def test_multi_output_classification_partial_fit_no_first_classes_exception():
+    sgd_linear_clf = SGDClassifier(loss="log_loss", random_state=1, max_iter=5)
+    multi_target_linear = MultiOutputClassifier(sgd_linear_clf)
+    msg = "classes must be passed on the first call to partial_fit."
+    with pytest.raises(ValueError, match=msg):
+        multi_target_linear.partial_fit(X, y)
+
+
+def test_multi_output_classification():
+    # test if multi_target initializes correctly with base estimator and fit
+    # assert predictions work as expected for predict, prodict_proba and score
+
+    forest = RandomForestClassifier(n_estimators=10, random_state=1)
+    multi_target_forest = MultiOutputClassifier(forest)
+
+    # train the multi_target_forest and also get the predictions.
+    multi_target_forest.fit(X, y)
+
+    predictions = multi_target_forest.predict(X)
+    assert (n_samples, n_outputs) == predictions.shape
+
+    predict_proba = multi_target_forest.predict_proba(X)
+
+    assert len(predict_proba) == n_outputs
+    for class_probabilities in predict_proba:
+        assert (n_samples, n_classes) == class_probabilities.shape
+
+    assert_array_equal(np.argmax(np.dstack(predict_proba), axis=1), predictions)
+
+    # train the forest with each column and assert that predictions are equal
+    for i in range(3):
+        forest_ = clone(forest)  # create a clone with the same state
+        forest_.fit(X, y[:, i])
+        assert list(forest_.predict(X)) == list(predictions[:, i])
+        assert_array_equal(list(forest_.predict_proba(X)), list(predict_proba[i]))
+
+
+def test_multiclass_multioutput_estimator():
+    # test to check meta of meta estimators
+    svc = LinearSVC(random_state=0)
+    multi_class_svc = OneVsRestClassifier(svc)
+    multi_target_svc = MultiOutputClassifier(multi_class_svc)
+
+    multi_target_svc.fit(X, y)
+
+    predictions = multi_target_svc.predict(X)
+    assert (n_samples, n_outputs) == predictions.shape
+
+    # train the forest with each column and assert that predictions are equal
+    for i in range(3):
+        multi_class_svc_ = clone(multi_class_svc)  # create a clone
+        multi_class_svc_.fit(X, y[:, i])
+        assert list(multi_class_svc_.predict(X)) == list(predictions[:, i])
+
+
+def test_multiclass_multioutput_estimator_predict_proba():
+    seed = 542
+
+    # make test deterministic
+    rng = np.random.RandomState(seed)
+
+    # random features
+    X = rng.normal(size=(5, 5))
+
+    # random labels
+    y1 = np.array(["b", "a", "a", "b", "a"]).reshape(5, 1)  # 2 classes
+    y2 = np.array(["d", "e", "f", "e", "d"]).reshape(5, 1)  # 3 classes
+
+    Y = np.concatenate([y1, y2], axis=1)
+
+    clf = MultiOutputClassifier(
+        LogisticRegression(solver="liblinear", random_state=seed)
+    )
+
+    clf.fit(X, Y)
+
+    y_result = clf.predict_proba(X)
+    y_actual = [
+        np.array(
+            [
+                [0.23481764, 0.76518236],
+                [0.67196072, 0.32803928],
+                [0.54681448, 0.45318552],
+                [0.34883923, 0.65116077],
+                [0.73687069, 0.26312931],
+            ]
+        ),
+        np.array(
+            [
+                [0.5171785, 0.23878628, 0.24403522],
+                [0.22141451, 0.64102704, 0.13755846],
+                [0.16751315, 0.18256843, 0.64991843],
+                [0.27357372, 0.55201592, 0.17441036],
+                [0.65745193, 0.26062899, 0.08191907],
+            ]
+        ),
+    ]
+
+    for i in range(len(y_actual)):
+        assert_almost_equal(y_result[i], y_actual[i])
+
+
+def test_multi_output_classification_sample_weights():
+    # weighted classifier
+    Xw = [[1, 2, 3], [4, 5, 6]]
+    yw = [[3, 2], [2, 3]]
+    w = np.asarray([2.0, 1.0])
+    forest = RandomForestClassifier(n_estimators=10, random_state=1)
+    clf_w = MultiOutputClassifier(forest)
+    clf_w.fit(Xw, yw, w)
+
+    # unweighted, but with repeated samples
+    X = [[1, 2, 3], [1, 2, 3], [4, 5, 6]]
+    y = [[3, 2], [3, 2], [2, 3]]
+    forest = RandomForestClassifier(n_estimators=10, random_state=1)
+    clf = MultiOutputClassifier(forest)
+    clf.fit(X, y)
+
+    X_test = [[1.5, 2.5, 3.5], [3.5, 4.5, 5.5]]
+    assert_almost_equal(clf.predict(X_test), clf_w.predict(X_test))
+
+
+def test_multi_output_classification_partial_fit_sample_weights():
+    # weighted classifier
+    Xw = [[1, 2, 3], [4, 5, 6], [1.5, 2.5, 3.5]]
+    yw = [[3, 2], [2, 3], [3, 2]]
+    w = np.asarray([2.0, 1.0, 1.0])
+    sgd_linear_clf = SGDClassifier(random_state=1, max_iter=20)
+    clf_w = MultiOutputClassifier(sgd_linear_clf)
+    clf_w.fit(Xw, yw, w)
+
+    # unweighted, but with repeated samples
+    X = [[1, 2, 3], [1, 2, 3], [4, 5, 6], [1.5, 2.5, 3.5]]
+    y = [[3, 2], [3, 2], [2, 3], [3, 2]]
+    sgd_linear_clf = SGDClassifier(random_state=1, max_iter=20)
+    clf = MultiOutputClassifier(sgd_linear_clf)
+    clf.fit(X, y)
+    X_test = [[1.5, 2.5, 3.5]]
+    assert_array_almost_equal(clf.predict(X_test), clf_w.predict(X_test))
+
+
+def test_multi_output_exceptions():
+    # NotFittedError when fit is not done but score, predict and
+    # and predict_proba are called
+    moc = MultiOutputClassifier(LinearSVC(random_state=0))
+    with pytest.raises(NotFittedError):
+        moc.score(X, y)
+
+    # ValueError when number of outputs is different
+    # for fit and score
+    y_new = np.column_stack((y1, y2))
+    moc.fit(X, y)
+    with pytest.raises(ValueError):
+        moc.score(X, y_new)
+
+    # ValueError when y is continuous
+    msg = "Unknown label type"
+    with pytest.raises(ValueError, match=msg):
+        moc.fit(X, X[:, 1])
+
+
+@pytest.mark.parametrize("response_method", ["predict_proba", "predict"])
+def test_multi_output_not_fitted_error(response_method):
+    """Check that we raise the proper error when the estimator is not fitted"""
+    moc = MultiOutputClassifier(LogisticRegression())
+    with pytest.raises(NotFittedError):
+        getattr(moc, response_method)(X)
+
+
+def test_multi_output_delegate_predict_proba():
+    """Check the behavior for the delegation of predict_proba to the underlying
+    estimator"""
+
+    # A base estimator with `predict_proba`should expose the method even before fit
+    moc = MultiOutputClassifier(LogisticRegression())
+    assert hasattr(moc, "predict_proba")
+    moc.fit(X, y)
+    assert hasattr(moc, "predict_proba")
+
+    # A base estimator without `predict_proba` should raise an AttributeError
+    moc = MultiOutputClassifier(LinearSVC())
+    assert not hasattr(moc, "predict_proba")
+
+    outer_msg = "'MultiOutputClassifier' has no attribute 'predict_proba'"
+    inner_msg = "'LinearSVC' object has no attribute 'predict_proba'"
+    with pytest.raises(AttributeError, match=outer_msg) as exec_info:
+        moc.predict_proba(X)
+    assert isinstance(exec_info.value.__cause__, AttributeError)
+    assert inner_msg == str(exec_info.value.__cause__)
+
+    moc.fit(X, y)
+    assert not hasattr(moc, "predict_proba")
+    with pytest.raises(AttributeError, match=outer_msg) as exec_info:
+        moc.predict_proba(X)
+    assert isinstance(exec_info.value.__cause__, AttributeError)
+    assert inner_msg == str(exec_info.value.__cause__)
+
+
+def generate_multilabel_dataset_with_correlations():
+    # Generate a multilabel data set from a multiclass dataset as a way of
+    # by representing the integer number of the original class using a binary
+    # encoding.
+    X, y = make_classification(
+        n_samples=1000, n_features=100, n_classes=16, n_informative=10, random_state=0
+    )
+
+    Y_multi = np.array([[int(yyy) for yyy in format(yy, "#06b")[2:]] for yy in y])
+    return X, Y_multi
+
+
+@pytest.mark.parametrize("chain_method", ["predict", "decision_function"])
+def test_classifier_chain_fit_and_predict_with_linear_svc(chain_method):
+    # Fit classifier chain and verify predict performance using LinearSVC
+    X, Y = generate_multilabel_dataset_with_correlations()
+    classifier_chain = ClassifierChain(
+        LinearSVC(),
+        chain_method=chain_method,
+    ).fit(X, Y)
+
+    Y_pred = classifier_chain.predict(X)
+    assert Y_pred.shape == Y.shape
+
+    Y_decision = classifier_chain.decision_function(X)
+
+    Y_binary = Y_decision >= 0
+    assert_array_equal(Y_binary, Y_pred)
+    assert not hasattr(classifier_chain, "predict_proba")
+
+
+@pytest.mark.parametrize("csr_container", CSR_CONTAINERS)
+def test_classifier_chain_fit_and_predict_with_sparse_data(csr_container):
+    # Fit classifier chain with sparse data
+    X, Y = generate_multilabel_dataset_with_correlations()
+    X_sparse = csr_container(X)
+
+    classifier_chain = ClassifierChain(LogisticRegression()).fit(X_sparse, Y)
+    Y_pred_sparse = classifier_chain.predict(X_sparse)
+
+    classifier_chain = ClassifierChain(LogisticRegression()).fit(X, Y)
+    Y_pred_dense = classifier_chain.predict(X)
+
+    assert_array_equal(Y_pred_sparse, Y_pred_dense)
+
+
+def test_classifier_chain_vs_independent_models():
+    # Verify that an ensemble of classifier chains (each of length
+    # N) can achieve a higher Jaccard similarity score than N independent
+    # models
+    X, Y = generate_multilabel_dataset_with_correlations()
+    X_train = X[:600, :]
+    X_test = X[600:, :]
+    Y_train = Y[:600, :]
+    Y_test = Y[600:, :]
+
+    ovr = OneVsRestClassifier(LogisticRegression())
+    ovr.fit(X_train, Y_train)
+    Y_pred_ovr = ovr.predict(X_test)
+
+    chain = ClassifierChain(LogisticRegression())
+    chain.fit(X_train, Y_train)
+    Y_pred_chain = chain.predict(X_test)
+
+    assert jaccard_score(Y_test, Y_pred_chain, average="samples") > jaccard_score(
+        Y_test, Y_pred_ovr, average="samples"
+    )
+
+
+@pytest.mark.parametrize(
+    "chain_method",
+    ["predict", "predict_proba", "predict_log_proba", "decision_function"],
+)
+@pytest.mark.parametrize("response_method", ["predict_proba", "predict_log_proba"])
+def test_classifier_chain_fit_and_predict(chain_method, response_method):
+    # Fit classifier chain and verify predict performance
+    X, Y = generate_multilabel_dataset_with_correlations()
+    chain = ClassifierChain(LogisticRegression(), chain_method=chain_method)
+    chain.fit(X, Y)
+    Y_pred = chain.predict(X)
+    assert Y_pred.shape == Y.shape
+    assert [c.coef_.size for c in chain.estimators_] == list(
+        range(X.shape[1], X.shape[1] + Y.shape[1])
+    )
+
+    Y_prob = getattr(chain, response_method)(X)
+    if response_method == "predict_log_proba":
+        Y_prob = np.exp(Y_prob)
+    Y_binary = Y_prob >= 0.5
+    assert_array_equal(Y_binary, Y_pred)
+
+    assert isinstance(chain, ClassifierMixin)
+
+
+def test_regressor_chain_fit_and_predict():
+    # Fit regressor chain and verify Y and estimator coefficients shape
+    X, Y = generate_multilabel_dataset_with_correlations()
+    chain = RegressorChain(Ridge())
+    chain.fit(X, Y)
+    Y_pred = chain.predict(X)
+    assert Y_pred.shape == Y.shape
+    assert [c.coef_.size for c in chain.estimators_] == list(
+        range(X.shape[1], X.shape[1] + Y.shape[1])
+    )
+
+
+@pytest.mark.parametrize("csr_container", CSR_CONTAINERS)
+def test_base_chain_fit_and_predict_with_sparse_data_and_cv(csr_container):
+    # Fit base chain with sparse data cross_val_predict
+    X, Y = generate_multilabel_dataset_with_correlations()
+    X_sparse = csr_container(X)
+    base_chains = [
+        ClassifierChain(LogisticRegression(), cv=3),
+        RegressorChain(Ridge(), cv=3),
+    ]
+    for chain in base_chains:
+        chain.fit(X_sparse, Y)
+        Y_pred = chain.predict(X_sparse)
+        assert Y_pred.shape == Y.shape
+
+
+def test_base_chain_random_order():
+    # Fit base chain with random order
+    X, Y = generate_multilabel_dataset_with_correlations()
+    for chain in [ClassifierChain(LogisticRegression()), RegressorChain(Ridge())]:
+        chain_random = clone(chain).set_params(order="random", random_state=42)
+        chain_random.fit(X, Y)
+        chain_fixed = clone(chain).set_params(order=chain_random.order_)
+        chain_fixed.fit(X, Y)
+        assert_array_equal(chain_fixed.order_, chain_random.order_)
+        assert list(chain_random.order) != list(range(4))
+        assert len(chain_random.order_) == 4
+        assert len(set(chain_random.order_)) == 4
+        # Randomly ordered chain should behave identically to a fixed order
+        # chain with the same order.
+        for est1, est2 in zip(chain_random.estimators_, chain_fixed.estimators_):
+            assert_array_almost_equal(est1.coef_, est2.coef_)
+
+
+@pytest.mark.parametrize(
+    "chain_type, chain_method",
+    [
+        ("classifier", "predict"),
+        ("classifier", "predict_proba"),
+        ("classifier", "predict_log_proba"),
+        ("classifier", "decision_function"),
+        ("regressor", ""),
+    ],
+)
+def test_base_chain_crossval_fit_and_predict(chain_type, chain_method):
+    # Fit chain with cross_val_predict and verify predict
+    # performance
+    X, Y = generate_multilabel_dataset_with_correlations()
+
+    if chain_type == "classifier":
+        chain = ClassifierChain(LogisticRegression(), chain_method=chain_method)
+    else:
+        chain = RegressorChain(Ridge())
+    chain.fit(X, Y)
+    chain_cv = clone(chain).set_params(cv=3)
+    chain_cv.fit(X, Y)
+    Y_pred_cv = chain_cv.predict(X)
+    Y_pred = chain.predict(X)
+
+    assert Y_pred_cv.shape == Y_pred.shape
+    assert not np.all(Y_pred == Y_pred_cv)
+    if isinstance(chain, ClassifierChain):
+        assert jaccard_score(Y, Y_pred_cv, average="samples") > 0.4
+    else:
+        assert mean_squared_error(Y, Y_pred_cv) < 0.25
+
+
+@pytest.mark.parametrize(
+    "estimator",
+    [
+        RandomForestClassifier(n_estimators=2),
+        MultiOutputClassifier(RandomForestClassifier(n_estimators=2)),
+        ClassifierChain(RandomForestClassifier(n_estimators=2)),
+    ],
+)
+def test_multi_output_classes_(estimator):
+    # Tests classes_ attribute of multioutput classifiers
+    # RandomForestClassifier supports multioutput out-of-the-box
+    estimator.fit(X, y)
+    assert isinstance(estimator.classes_, list)
+    assert len(estimator.classes_) == n_outputs
+    for estimator_classes, expected_classes in zip(classes, estimator.classes_):
+        assert_array_equal(estimator_classes, expected_classes)
+
+
+class DummyRegressorWithFitParams(DummyRegressor):
+    def fit(self, X, y, sample_weight=None, **fit_params):
+        self._fit_params = fit_params
+        return super().fit(X, y, sample_weight)
+
+
+class DummyClassifierWithFitParams(DummyClassifier):
+    def fit(self, X, y, sample_weight=None, **fit_params):
+        self._fit_params = fit_params
+        return super().fit(X, y, sample_weight)
+
+
+@pytest.mark.filterwarnings("ignore:`n_features_in_` is deprecated")
+@pytest.mark.parametrize(
+    "estimator, dataset",
+    [
+        (
+            MultiOutputClassifier(DummyClassifierWithFitParams(strategy="prior")),
+            datasets.make_multilabel_classification(),
+        ),
+        (
+            MultiOutputRegressor(DummyRegressorWithFitParams()),
+            datasets.make_regression(n_targets=3, random_state=0),
+        ),
+    ],
+)
+def test_multioutput_estimator_with_fit_params(estimator, dataset):
+    X, y = dataset
+    some_param = np.zeros_like(X)
+    estimator.fit(X, y, some_param=some_param)
+    for dummy_estimator in estimator.estimators_:
+        assert "some_param" in dummy_estimator._fit_params
+
+
+def test_regressor_chain_w_fit_params():
+    # Make sure fit_params are properly propagated to the sub-estimators
+    rng = np.random.RandomState(0)
+    X, y = datasets.make_regression(n_targets=3, random_state=0)
+    weight = rng.rand(y.shape[0])
+
+    class MySGD(SGDRegressor):
+        def fit(self, X, y, **fit_params):
+            self.sample_weight_ = fit_params["sample_weight"]
+            super().fit(X, y, **fit_params)
+
+    model = RegressorChain(MySGD())
+
+    # Fitting with params
+    fit_param = {"sample_weight": weight}
+    model.fit(X, y, **fit_param)
+
+    for est in model.estimators_:
+        assert est.sample_weight_ is weight
+
+
+@pytest.mark.parametrize(
+    "MultiOutputEstimator, Estimator",
+    [(MultiOutputClassifier, LogisticRegression), (MultiOutputRegressor, Ridge)],
+)
+# FIXME: we should move this test in `estimator_checks` once we are able
+# to construct meta-estimator instances
+def test_support_missing_values(MultiOutputEstimator, Estimator):
+    # smoke test to check that pipeline MultioutputEstimators are letting
+    # the validation of missing values to
+    # the underlying pipeline, regressor or classifier
+    rng = np.random.RandomState(42)
+    X, y = rng.randn(50, 2), rng.binomial(1, 0.5, (50, 3))
+    mask = rng.choice([1, 0], X.shape, p=[0.01, 0.99]).astype(bool)
+    X[mask] = np.nan
+
+    pipe = make_pipeline(SimpleImputer(), Estimator())
+    MultiOutputEstimator(pipe).fit(X, y).score(X, y)
+
+
+@pytest.mark.parametrize("order_type", [list, np.array, tuple])
+def test_classifier_chain_tuple_order(order_type):
+    X = [[1, 2, 3], [4, 5, 6], [1.5, 2.5, 3.5]]
+    y = [[3, 2], [2, 3], [3, 2]]
+    order = order_type([1, 0])
+
+    chain = ClassifierChain(
+        RandomForestClassifier(n_estimators=2, random_state=0), order=order
+    )
+
+    chain.fit(X, y)
+    X_test = [[1.5, 2.5, 3.5]]
+    y_test = [[3, 2]]
+    assert_array_almost_equal(chain.predict(X_test), y_test)
+
+
+def test_classifier_chain_tuple_invalid_order():
+    X = [[1, 2, 3], [4, 5, 6], [1.5, 2.5, 3.5]]
+    y = [[3, 2], [2, 3], [3, 2]]
+    order = tuple([1, 2])
+
+    chain = ClassifierChain(RandomForestClassifier(), order=order)
+
+    with pytest.raises(ValueError, match="invalid order"):
+        chain.fit(X, y)
+
+
+def test_classifier_chain_verbose(capsys):
+    X, y = make_multilabel_classification(
+        n_samples=100, n_features=5, n_classes=3, n_labels=3, random_state=0
+    )
+    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
+
+    pattern = (
+        r"\[Chain\].*\(1 of 3\) Processing order 0, total=.*\n"
+        r"\[Chain\].*\(2 of 3\) Processing order 1, total=.*\n"
+        r"\[Chain\].*\(3 of 3\) Processing order 2, total=.*\n$"
+    )
+
+    classifier = ClassifierChain(
+        DecisionTreeClassifier(),
+        order=[0, 1, 2],
+        random_state=0,
+        verbose=True,
+    )
+    classifier.fit(X_train, y_train)
+    assert re.match(pattern, capsys.readouterr()[0])
+
+
+def test_regressor_chain_verbose(capsys):
+    X, y = make_regression(n_samples=125, n_targets=3, random_state=0)
+    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
+
+    pattern = (
+        r"\[Chain\].*\(1 of 3\) Processing order 1, total=.*\n"
+        r"\[Chain\].*\(2 of 3\) Processing order 0, total=.*\n"
+        r"\[Chain\].*\(3 of 3\) Processing order 2, total=.*\n$"
+    )
+    regressor = RegressorChain(
+        LinearRegression(),
+        order=[1, 0, 2],
+        random_state=0,
+        verbose=True,
+    )
+    regressor.fit(X_train, y_train)
+    assert re.match(pattern, capsys.readouterr()[0])
+
+
+def test_multioutputregressor_ducktypes_fitted_estimator():
+    """Test that MultiOutputRegressor checks the fitted estimator for
+    predict. Non-regression test for #16549."""
+    X, y = load_linnerud(return_X_y=True)
+    stacker = StackingRegressor(
+        estimators=[("sgd", SGDRegressor(random_state=1))],
+        final_estimator=Ridge(),
+        cv=2,
+    )
+
+    reg = MultiOutputRegressor(estimator=stacker).fit(X, y)
+
+    # Does not raise
+    reg.predict(X)
+
+
+@pytest.mark.parametrize(
+    "Cls, method", [(ClassifierChain, "fit"), (MultiOutputClassifier, "partial_fit")]
+)
+def test_fit_params_no_routing(Cls, method):
+    """Check that we raise an error when passing metadata not requested by the
+    underlying classifier.
+    """
+    X, y = make_classification(n_samples=50)
+    clf = Cls(PassiveAggressiveClassifier())
+
+    with pytest.raises(ValueError, match="is only supported if"):
+        getattr(clf, method)(X, y, test=1)
+
+
+def test_multioutput_regressor_has_partial_fit():
+    # Test that an unfitted MultiOutputRegressor handles available_if for
+    # partial_fit correctly
+    est = MultiOutputRegressor(LinearRegression())
+    msg = "This 'MultiOutputRegressor' has no attribute 'partial_fit'"
+    with pytest.raises(AttributeError, match=msg):
+        getattr(est, "partial_fit")
@@ -0,0 +1,973 @@
+import re
+import warnings
+
+import numpy as np
+import pytest
+from scipy.special import logsumexp
+
+from sklearn.datasets import load_digits, load_iris
+from sklearn.model_selection import cross_val_score, train_test_split
+from sklearn.naive_bayes import (
+    BernoulliNB,
+    CategoricalNB,
+    ComplementNB,
+    GaussianNB,
+    MultinomialNB,
+)
+from sklearn.utils._testing import (
+    assert_allclose,
+    assert_almost_equal,
+    assert_array_almost_equal,
+    assert_array_equal,
+)
+from sklearn.utils.fixes import CSR_CONTAINERS
+
+DISCRETE_NAIVE_BAYES_CLASSES = [BernoulliNB, CategoricalNB, ComplementNB, MultinomialNB]
+ALL_NAIVE_BAYES_CLASSES = DISCRETE_NAIVE_BAYES_CLASSES + [GaussianNB]
+
+msg = "The default value for `force_alpha` will change"
+pytestmark = pytest.mark.filterwarnings(f"ignore:{msg}:FutureWarning")
+
+# Data is just 6 separable points in the plane
+X = np.array([[-2, -1], [-1, -1], [-1, -2], [1, 1], [1, 2], [2, 1]])
+y = np.array([1, 1, 1, 2, 2, 2])
+
+
+def get_random_normal_x_binary_y(global_random_seed):
+    # A bit more random tests
+    rng = np.random.RandomState(global_random_seed)
+    X1 = rng.normal(size=(10, 3))
+    y1 = (rng.normal(size=10) > 0).astype(int)
+    return X1, y1
+
+
+def get_random_integer_x_three_classes_y(global_random_seed):
+    # Data is 6 random integer points in a 100 dimensional space classified to
+    # three classes.
+    rng = np.random.RandomState(global_random_seed)
+    X2 = rng.randint(5, size=(6, 100))
+    y2 = np.array([1, 1, 2, 2, 3, 3])
+    return X2, y2
+
+
+def test_gnb():
+    # Gaussian Naive Bayes classification.
+    # This checks that GaussianNB implements fit and predict and returns
+    # correct values for a simple toy dataset.
+
+    clf = GaussianNB()
+    y_pred = clf.fit(X, y).predict(X)
+    assert_array_equal(y_pred, y)
+
+    y_pred_proba = clf.predict_proba(X)
+    y_pred_log_proba = clf.predict_log_proba(X)
+    assert_array_almost_equal(np.log(y_pred_proba), y_pred_log_proba, 8)
+
+    # Test whether label mismatch between target y and classes raises
+    # an Error
+    # FIXME Remove this test once the more general partial_fit tests are merged
+    with pytest.raises(
+        ValueError, match="The target label.* in y do not exist in the initial classes"
+    ):
+        GaussianNB().partial_fit(X, y, classes=[0, 1])
+
+
+def test_gnb_prior(global_random_seed):
+    # Test whether class priors are properly set.
+    clf = GaussianNB().fit(X, y)
+    assert_array_almost_equal(np.array([3, 3]) / 6.0, clf.class_prior_, 8)
+    X1, y1 = get_random_normal_x_binary_y(global_random_seed)
+    clf = GaussianNB().fit(X1, y1)
+    # Check that the class priors sum to 1
+    assert_array_almost_equal(clf.class_prior_.sum(), 1)
+
+
+def test_gnb_sample_weight(global_random_seed):
+    """Test whether sample weights are properly used in GNB."""
+    # Sample weights all being 1 should not change results
+    sw = np.ones(6)
+    clf = GaussianNB().fit(X, y)
+    clf_sw = GaussianNB().fit(X, y, sw)
+
+    assert_array_almost_equal(clf.theta_, clf_sw.theta_)
+    assert_array_almost_equal(clf.var_, clf_sw.var_)
+
+    # Fitting twice with half sample-weights should result
+    # in same result as fitting once with full weights
+    rng = np.random.RandomState(global_random_seed)
+
+    sw = rng.rand(y.shape[0])
+    clf1 = GaussianNB().fit(X, y, sample_weight=sw)
+    clf2 = GaussianNB().partial_fit(X, y, classes=[1, 2], sample_weight=sw / 2)
+    clf2.partial_fit(X, y, sample_weight=sw / 2)
+
+    assert_array_almost_equal(clf1.theta_, clf2.theta_)
+    assert_array_almost_equal(clf1.var_, clf2.var_)
+
+    # Check that duplicate entries and correspondingly increased sample
+    # weights yield the same result
+    ind = rng.randint(0, X.shape[0], 20)
+    sample_weight = np.bincount(ind, minlength=X.shape[0])
+
+    clf_dupl = GaussianNB().fit(X[ind], y[ind])
+    clf_sw = GaussianNB().fit(X, y, sample_weight)
+
+    assert_array_almost_equal(clf_dupl.theta_, clf_sw.theta_)
+    assert_array_almost_equal(clf_dupl.var_, clf_sw.var_)
+
+    # non-regression test for gh-24140 where a division by zero was
+    # occurring when a single class was present
+    sample_weight = (y == 1).astype(np.float64)
+    clf = GaussianNB().fit(X, y, sample_weight=sample_weight)
+
+
+def test_gnb_neg_priors():
+    """Test whether an error is raised in case of negative priors"""
+    clf = GaussianNB(priors=np.array([-1.0, 2.0]))
+
+    msg = "Priors must be non-negative"
+    with pytest.raises(ValueError, match=msg):
+        clf.fit(X, y)
+
+
+def test_gnb_priors():
+    """Test whether the class prior override is properly used"""
+    clf = GaussianNB(priors=np.array([0.3, 0.7])).fit(X, y)
+    assert_array_almost_equal(
+        clf.predict_proba([[-0.1, -0.1]]),
+        np.array([[0.825303662161683, 0.174696337838317]]),
+        8,
+    )
+    assert_array_almost_equal(clf.class_prior_, np.array([0.3, 0.7]))
+
+
+def test_gnb_priors_sum_isclose():
+    # test whether the class prior sum is properly tested"""
+    X = np.array(
+        [
+            [-1, -1],
+            [-2, -1],
+            [-3, -2],
+            [-4, -5],
+            [-5, -4],
+            [1, 1],
+            [2, 1],
+            [3, 2],
+            [4, 4],
+            [5, 5],
+        ]
+    )
+    priors = np.array([0.08, 0.14, 0.03, 0.16, 0.11, 0.16, 0.07, 0.14, 0.11, 0.0])
+    Y = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10])
+    clf = GaussianNB(priors=priors)
+    # smoke test for issue #9633
+    clf.fit(X, Y)
+
+
+def test_gnb_wrong_nb_priors():
+    """Test whether an error is raised if the number of prior is different
+    from the number of class"""
+    clf = GaussianNB(priors=np.array([0.25, 0.25, 0.25, 0.25]))
+
+    msg = "Number of priors must match number of classes"
+    with pytest.raises(ValueError, match=msg):
+        clf.fit(X, y)
+
+
+def test_gnb_prior_greater_one():
+    """Test if an error is raised if the sum of prior greater than one"""
+    clf = GaussianNB(priors=np.array([2.0, 1.0]))
+
+    msg = "The sum of the priors should be 1"
+    with pytest.raises(ValueError, match=msg):
+        clf.fit(X, y)
+
+
+def test_gnb_prior_large_bias():
+    """Test if good prediction when class prior favor largely one class"""
+    clf = GaussianNB(priors=np.array([0.01, 0.99]))
+    clf.fit(X, y)
+    assert clf.predict([[-0.1, -0.1]]) == np.array([2])
+
+
+def test_gnb_check_update_with_no_data():
+    """Test when the partial fit is called without any data"""
+    # Create an empty array
+    prev_points = 100
+    mean = 0.0
+    var = 1.0
+    x_empty = np.empty((0, X.shape[1]))
+    tmean, tvar = GaussianNB._update_mean_variance(prev_points, mean, var, x_empty)
+    assert tmean == mean
+    assert tvar == var
+
+
+def test_gnb_partial_fit():
+    clf = GaussianNB().fit(X, y)
+    clf_pf = GaussianNB().partial_fit(X, y, np.unique(y))
+    assert_array_almost_equal(clf.theta_, clf_pf.theta_)
+    assert_array_almost_equal(clf.var_, clf_pf.var_)
+    assert_array_almost_equal(clf.class_prior_, clf_pf.class_prior_)
+
+    clf_pf2 = GaussianNB().partial_fit(X[0::2, :], y[0::2], np.unique(y))
+    clf_pf2.partial_fit(X[1::2], y[1::2])
+    assert_array_almost_equal(clf.theta_, clf_pf2.theta_)
+    assert_array_almost_equal(clf.var_, clf_pf2.var_)
+    assert_array_almost_equal(clf.class_prior_, clf_pf2.class_prior_)
+
+
+def test_gnb_naive_bayes_scale_invariance():
+    # Scaling the data should not change the prediction results
+    iris = load_iris()
+    X, y = iris.data, iris.target
+    labels = [GaussianNB().fit(f * X, y).predict(f * X) for f in [1e-10, 1, 1e10]]
+    assert_array_equal(labels[0], labels[1])
+    assert_array_equal(labels[1], labels[2])
+
+
+@pytest.mark.parametrize("DiscreteNaiveBayes", DISCRETE_NAIVE_BAYES_CLASSES)
+def test_discretenb_prior(DiscreteNaiveBayes, global_random_seed):
+    # Test whether class priors are properly set.
+    X2, y2 = get_random_integer_x_three_classes_y(global_random_seed)
+    clf = DiscreteNaiveBayes().fit(X2, y2)
+    assert_array_almost_equal(
+        np.log(np.array([2, 2, 2]) / 6.0), clf.class_log_prior_, 8
+    )
+
+
+@pytest.mark.parametrize("DiscreteNaiveBayes", DISCRETE_NAIVE_BAYES_CLASSES)
+def test_discretenb_partial_fit(DiscreteNaiveBayes):
+    clf1 = DiscreteNaiveBayes()
+    clf1.fit([[0, 1], [1, 0], [1, 1]], [0, 1, 1])
+
+    clf2 = DiscreteNaiveBayes()
+    clf2.partial_fit([[0, 1], [1, 0], [1, 1]], [0, 1, 1], classes=[0, 1])
+    assert_array_equal(clf1.class_count_, clf2.class_count_)
+    if DiscreteNaiveBayes is CategoricalNB:
+        for i in range(len(clf1.category_count_)):
+            assert_array_equal(clf1.category_count_[i], clf2.category_count_[i])
+    else:
+        assert_array_equal(clf1.feature_count_, clf2.feature_count_)
+
+    clf3 = DiscreteNaiveBayes()
+    # all categories have to appear in the first partial fit
+    clf3.partial_fit([[0, 1]], [0], classes=[0, 1])
+    clf3.partial_fit([[1, 0]], [1])
+    clf3.partial_fit([[1, 1]], [1])
+    assert_array_equal(clf1.class_count_, clf3.class_count_)
+    if DiscreteNaiveBayes is CategoricalNB:
+        # the categories for each feature of CategoricalNB are mapped to an
+        # index chronologically with each call of partial fit and therefore
+        # the category_count matrices cannot be compared for equality
+        for i in range(len(clf1.category_count_)):
+            assert_array_equal(
+                clf1.category_count_[i].shape, clf3.category_count_[i].shape
+            )
+            assert_array_equal(
+                np.sum(clf1.category_count_[i], axis=1),
+                np.sum(clf3.category_count_[i], axis=1),
+            )
+
+        # assert category 0 occurs 1x in the first class and 0x in the 2nd
+        # class
+        assert_array_equal(clf1.category_count_[0][0], np.array([1, 0]))
+        # assert category 1 occurs 0x in the first class and 2x in the 2nd
+        # class
+        assert_array_equal(clf1.category_count_[0][1], np.array([0, 2]))
+
+        # assert category 0 occurs 0x in the first class and 1x in the 2nd
+        # class
+        assert_array_equal(clf1.category_count_[1][0], np.array([0, 1]))
+        # assert category 1 occurs 1x in the first class and 1x in the 2nd
+        # class
+        assert_array_equal(clf1.category_count_[1][1], np.array([1, 1]))
+    else:
+        assert_array_equal(clf1.feature_count_, clf3.feature_count_)
+
+
+@pytest.mark.parametrize("NaiveBayes", ALL_NAIVE_BAYES_CLASSES)
+def test_NB_partial_fit_no_first_classes(NaiveBayes, global_random_seed):
+    # classes is required for first call to partial fit
+    X2, y2 = get_random_integer_x_three_classes_y(global_random_seed)
+
+    with pytest.raises(
+        ValueError, match="classes must be passed on the first call to partial_fit."
+    ):
+        NaiveBayes().partial_fit(X2, y2)
+
+    # check consistency of consecutive classes values
+    clf = NaiveBayes()
+    clf.partial_fit(X2, y2, classes=np.unique(y2))
+    with pytest.raises(
+        ValueError, match="is not the same as on last call to partial_fit"
+    ):
+        clf.partial_fit(X2, y2, classes=np.arange(42))
+
+
+def test_discretenb_predict_proba():
+    # Test discrete NB classes' probability scores
+
+    # The 100s below distinguish Bernoulli from multinomial.
+    # FIXME: write a test to show this.
+    X_bernoulli = [[1, 100, 0], [0, 1, 0], [0, 100, 1]]
+    X_multinomial = [[0, 1], [1, 3], [4, 0]]
+
+    # test binary case (1-d output)
+    y = [0, 0, 2]  # 2 is regression test for binary case, 02e673
+    for DiscreteNaiveBayes, X in zip(
+        [BernoulliNB, MultinomialNB], [X_bernoulli, X_multinomial]
+    ):
+        clf = DiscreteNaiveBayes().fit(X, y)
+        assert clf.predict(X[-1:]) == 2
+        assert clf.predict_proba([X[0]]).shape == (1, 2)
+        assert_array_almost_equal(
+            clf.predict_proba(X[:2]).sum(axis=1), np.array([1.0, 1.0]), 6
+        )
+
+    # test multiclass case (2-d output, must sum to one)
+    y = [0, 1, 2]
+    for DiscreteNaiveBayes, X in zip(
+        [BernoulliNB, MultinomialNB], [X_bernoulli, X_multinomial]
+    ):
+        clf = DiscreteNaiveBayes().fit(X, y)
+        assert clf.predict_proba(X[0:1]).shape == (1, 3)
+        assert clf.predict_proba(X[:2]).shape == (2, 3)
+        assert_almost_equal(np.sum(clf.predict_proba([X[1]])), 1)
+        assert_almost_equal(np.sum(clf.predict_proba([X[-1]])), 1)
+        assert_almost_equal(np.sum(np.exp(clf.class_log_prior_)), 1)
+
+
+@pytest.mark.parametrize("DiscreteNaiveBayes", DISCRETE_NAIVE_BAYES_CLASSES)
+def test_discretenb_uniform_prior(DiscreteNaiveBayes):
+    # Test whether discrete NB classes fit a uniform prior
+    # when fit_prior=False and class_prior=None
+
+    clf = DiscreteNaiveBayes()
+    clf.set_params(fit_prior=False)
+    clf.fit([[0], [0], [1]], [0, 0, 1])
+    prior = np.exp(clf.class_log_prior_)
+    assert_array_almost_equal(prior, np.array([0.5, 0.5]))
+
+
+@pytest.mark.parametrize("DiscreteNaiveBayes", DISCRETE_NAIVE_BAYES_CLASSES)
+def test_discretenb_provide_prior(DiscreteNaiveBayes):
+    # Test whether discrete NB classes use provided prior
+
+    clf = DiscreteNaiveBayes(class_prior=[0.5, 0.5])
+    clf.fit([[0], [0], [1]], [0, 0, 1])
+    prior = np.exp(clf.class_log_prior_)
+    assert_array_almost_equal(prior, np.array([0.5, 0.5]))
+
+    # Inconsistent number of classes with prior
+    msg = "Number of priors must match number of classes"
+    with pytest.raises(ValueError, match=msg):
+        clf.fit([[0], [1], [2]], [0, 1, 2])
+
+    msg = "is not the same as on last call to partial_fit"
+    with pytest.raises(ValueError, match=msg):
+        clf.partial_fit([[0], [1]], [0, 1], classes=[0, 1, 1])
+
+
+@pytest.mark.parametrize("DiscreteNaiveBayes", DISCRETE_NAIVE_BAYES_CLASSES)
+def test_discretenb_provide_prior_with_partial_fit(DiscreteNaiveBayes):
+    # Test whether discrete NB classes use provided prior
+    # when using partial_fit
+
+    iris = load_iris()
+    iris_data1, iris_data2, iris_target1, iris_target2 = train_test_split(
+        iris.data, iris.target, test_size=0.4, random_state=415
+    )
+
+    for prior in [None, [0.3, 0.3, 0.4]]:
+        clf_full = DiscreteNaiveBayes(class_prior=prior)
+        clf_full.fit(iris.data, iris.target)
+        clf_partial = DiscreteNaiveBayes(class_prior=prior)
+        clf_partial.partial_fit(iris_data1, iris_target1, classes=[0, 1, 2])
+        clf_partial.partial_fit(iris_data2, iris_target2)
+        assert_array_almost_equal(
+            clf_full.class_log_prior_, clf_partial.class_log_prior_
+        )
+
+
+@pytest.mark.parametrize("DiscreteNaiveBayes", DISCRETE_NAIVE_BAYES_CLASSES)
+def test_discretenb_sample_weight_multiclass(DiscreteNaiveBayes):
+    # check shape consistency for number of samples at fit time
+    X = [
+        [0, 0, 1],
+        [0, 1, 1],
+        [0, 1, 1],
+        [1, 0, 0],
+    ]
+    y = [0, 0, 1, 2]
+    sample_weight = np.array([1, 1, 2, 2], dtype=np.float64)
+    sample_weight /= sample_weight.sum()
+    clf = DiscreteNaiveBayes().fit(X, y, sample_weight=sample_weight)
+    assert_array_equal(clf.predict(X), [0, 1, 1, 2])
+
+    # Check sample weight using the partial_fit method
+    clf = DiscreteNaiveBayes()
+    clf.partial_fit(X[:2], y[:2], classes=[0, 1, 2], sample_weight=sample_weight[:2])
+    clf.partial_fit(X[2:3], y[2:3], sample_weight=sample_weight[2:3])
+    clf.partial_fit(X[3:], y[3:], sample_weight=sample_weight[3:])
+    assert_array_equal(clf.predict(X), [0, 1, 1, 2])
+
+
+@pytest.mark.parametrize("DiscreteNaiveBayes", DISCRETE_NAIVE_BAYES_CLASSES)
+@pytest.mark.parametrize("use_partial_fit", [False, True])
+@pytest.mark.parametrize("train_on_single_class_y", [False, True])
+def test_discretenb_degenerate_one_class_case(
+    DiscreteNaiveBayes,
+    use_partial_fit,
+    train_on_single_class_y,
+):
+    # Most array attributes of a discrete naive Bayes classifier should have a
+    # first-axis length equal to the number of classes. Exceptions include:
+    # ComplementNB.feature_all_, CategoricalNB.n_categories_.
+    # Confirm that this is the case for binary problems and the degenerate
+    # case of a single class in the training set, when fitting with `fit` or
+    # `partial_fit`.
+    # Non-regression test for handling degenerate one-class case:
+    # https://github.com/scikit-learn/scikit-learn/issues/18974
+
+    X = [[1, 0, 0], [0, 1, 0], [0, 0, 1]]
+    y = [1, 1, 2]
+    if train_on_single_class_y:
+        X = X[:-1]
+        y = y[:-1]
+    classes = sorted(list(set(y)))
+    num_classes = len(classes)
+
+    clf = DiscreteNaiveBayes()
+    if use_partial_fit:
+        clf.partial_fit(X, y, classes=classes)
+    else:
+        clf.fit(X, y)
+    assert clf.predict(X[:1]) == y[0]
+
+    # Check that attributes have expected first-axis lengths
+    attribute_names = [
+        "classes_",
+        "class_count_",
+        "class_log_prior_",
+        "feature_count_",
+        "feature_log_prob_",
+    ]
+    for attribute_name in attribute_names:
+        attribute = getattr(clf, attribute_name, None)
+        if attribute is None:
+            # CategoricalNB has no feature_count_ attribute
+            continue
+        if isinstance(attribute, np.ndarray):
+            assert attribute.shape[0] == num_classes
+        else:
+            # CategoricalNB.feature_log_prob_ is a list of arrays
+            for element in attribute:
+                assert element.shape[0] == num_classes
+
+
+@pytest.mark.parametrize("kind", ("dense", "sparse"))
+@pytest.mark.parametrize("csr_container", CSR_CONTAINERS)
+def test_mnnb(kind, global_random_seed, csr_container):
+    # Test Multinomial Naive Bayes classification.
+    # This checks that MultinomialNB implements fit and predict and returns
+    # correct values for a simple toy dataset.
+    X2, y2 = get_random_integer_x_three_classes_y(global_random_seed)
+
+    if kind == "dense":
+        X = X2
+    elif kind == "sparse":
+        X = csr_container(X2)
+
+    # Check the ability to predict the learning set.
+    clf = MultinomialNB()
+
+    msg = "Negative values in data passed to"
+    with pytest.raises(ValueError, match=msg):
+        clf.fit(-X, y2)
+    y_pred = clf.fit(X, y2).predict(X)
+
+    assert_array_equal(y_pred, y2)
+
+    # Verify that np.log(clf.predict_proba(X)) gives the same results as
+    # clf.predict_log_proba(X)
+    y_pred_proba = clf.predict_proba(X)
+    y_pred_log_proba = clf.predict_log_proba(X)
+    assert_array_almost_equal(np.log(y_pred_proba), y_pred_log_proba, 8)
+
+    # Check that incremental fitting yields the same results
+    clf2 = MultinomialNB()
+    clf2.partial_fit(X[:2], y2[:2], classes=np.unique(y2))
+    clf2.partial_fit(X[2:5], y2[2:5])
+    clf2.partial_fit(X[5:], y2[5:])
+
+    y_pred2 = clf2.predict(X)
+    assert_array_equal(y_pred2, y2)
+
+    y_pred_proba2 = clf2.predict_proba(X)
+    y_pred_log_proba2 = clf2.predict_log_proba(X)
+    assert_array_almost_equal(np.log(y_pred_proba2), y_pred_log_proba2, 8)
+    assert_array_almost_equal(y_pred_proba2, y_pred_proba)
+    assert_array_almost_equal(y_pred_log_proba2, y_pred_log_proba)
+
+    # Partial fit on the whole data at once should be the same as fit too
+    clf3 = MultinomialNB()
+    clf3.partial_fit(X, y2, classes=np.unique(y2))
+
+    y_pred3 = clf3.predict(X)
+    assert_array_equal(y_pred3, y2)
+    y_pred_proba3 = clf3.predict_proba(X)
+    y_pred_log_proba3 = clf3.predict_log_proba(X)
+    assert_array_almost_equal(np.log(y_pred_proba3), y_pred_log_proba3, 8)
+    assert_array_almost_equal(y_pred_proba3, y_pred_proba)
+    assert_array_almost_equal(y_pred_log_proba3, y_pred_log_proba)
+
+
+def test_mnb_prior_unobserved_targets():
+    # test smoothing of prior for yet unobserved targets
+
+    # Create toy training data
+    X = np.array([[0, 1], [1, 0]])
+    y = np.array([0, 1])
+
+    clf = MultinomialNB()
+
+    with warnings.catch_warnings():
+        warnings.simplefilter("error", RuntimeWarning)
+
+        clf.partial_fit(X, y, classes=[0, 1, 2])
+
+    assert clf.predict([[0, 1]]) == 0
+    assert clf.predict([[1, 0]]) == 1
+    assert clf.predict([[1, 1]]) == 0
+
+    # add a training example with previously unobserved class
+    with warnings.catch_warnings():
+        warnings.simplefilter("error", RuntimeWarning)
+
+        clf.partial_fit([[1, 1]], [2])
+
+    assert clf.predict([[0, 1]]) == 0
+    assert clf.predict([[1, 0]]) == 1
+    assert clf.predict([[1, 1]]) == 2
+
+
+def test_bnb():
+    # Tests that BernoulliNB when alpha=1.0 gives the same values as
+    # those given for the toy example in Manning, Raghavan, and
+    # Schuetze's "Introduction to Information Retrieval" book:
+    # https://nlp.stanford.edu/IR-book/html/htmledition/the-bernoulli-model-1.html
+
+    # Training data points are:
+    # Chinese Beijing Chinese (class: China)
+    # Chinese Chinese Shanghai (class: China)
+    # Chinese Macao (class: China)
+    # Tokyo Japan Chinese (class: Japan)
+
+    # Features are Beijing, Chinese, Japan, Macao, Shanghai, and Tokyo
+    X = np.array(
+        [[1, 1, 0, 0, 0, 0], [0, 1, 0, 0, 1, 0], [0, 1, 0, 1, 0, 0], [0, 1, 1, 0, 0, 1]]
+    )
+
+    # Classes are China (0), Japan (1)
+    Y = np.array([0, 0, 0, 1])
+
+    # Fit BernoulliBN w/ alpha = 1.0
+    clf = BernoulliNB(alpha=1.0)
+    clf.fit(X, Y)
+
+    # Check the class prior is correct
+    class_prior = np.array([0.75, 0.25])
+    assert_array_almost_equal(np.exp(clf.class_log_prior_), class_prior)
+
+    # Check the feature probabilities are correct
+    feature_prob = np.array(
+        [
+            [0.4, 0.8, 0.2, 0.4, 0.4, 0.2],
+            [1 / 3.0, 2 / 3.0, 2 / 3.0, 1 / 3.0, 1 / 3.0, 2 / 3.0],
+        ]
+    )
+    assert_array_almost_equal(np.exp(clf.feature_log_prob_), feature_prob)
+
+    # Testing data point is:
+    # Chinese Chinese Chinese Tokyo Japan
+    X_test = np.array([[0, 1, 1, 0, 0, 1]])
+
+    # Check the predictive probabilities are correct
+    unnorm_predict_proba = np.array([[0.005183999999999999, 0.02194787379972565]])
+    predict_proba = unnorm_predict_proba / np.sum(unnorm_predict_proba)
+    assert_array_almost_equal(clf.predict_proba(X_test), predict_proba)
+
+
+def test_bnb_feature_log_prob():
+    # Test for issue #4268.
+    # Tests that the feature log prob value computed by BernoulliNB when
+    # alpha=1.0 is equal to the expression given in Manning, Raghavan,
+    # and Schuetze's "Introduction to Information Retrieval" book:
+    # http://nlp.stanford.edu/IR-book/html/htmledition/the-bernoulli-model-1.html
+
+    X = np.array([[0, 0, 0], [1, 1, 0], [0, 1, 0], [1, 0, 1], [0, 1, 0]])
+    Y = np.array([0, 0, 1, 2, 2])
+
+    # Fit Bernoulli NB w/ alpha = 1.0
+    clf = BernoulliNB(alpha=1.0)
+    clf.fit(X, Y)
+
+    # Manually form the (log) numerator and denominator that
+    # constitute P(feature presence | class)
+    num = np.log(clf.feature_count_ + 1.0)
+    denom = np.tile(np.log(clf.class_count_ + 2.0), (X.shape[1], 1)).T
+
+    # Check manual estimate matches
+    assert_array_almost_equal(clf.feature_log_prob_, (num - denom))
+
+
+def test_cnb():
+    # Tests ComplementNB when alpha=1.0 for the toy example in Manning,
+    # Raghavan, and Schuetze's "Introduction to Information Retrieval" book:
+    # https://nlp.stanford.edu/IR-book/html/htmledition/the-bernoulli-model-1.html
+
+    # Training data points are:
+    # Chinese Beijing Chinese (class: China)
+    # Chinese Chinese Shanghai (class: China)
+    # Chinese Macao (class: China)
+    # Tokyo Japan Chinese (class: Japan)
+
+    # Features are Beijing, Chinese, Japan, Macao, Shanghai, and Tokyo.
+    X = np.array(
+        [[1, 1, 0, 0, 0, 0], [0, 1, 0, 0, 1, 0], [0, 1, 0, 1, 0, 0], [0, 1, 1, 0, 0, 1]]
+    )
+
+    # Classes are China (0), Japan (1).
+    Y = np.array([0, 0, 0, 1])
+
+    # Check that weights are correct. See steps 4-6 in Table 4 of
+    # Rennie et al. (2003).
+    theta = np.array(
+        [
+            [
+                (0 + 1) / (3 + 6),
+                (1 + 1) / (3 + 6),
+                (1 + 1) / (3 + 6),
+                (0 + 1) / (3 + 6),
+                (0 + 1) / (3 + 6),
+                (1 + 1) / (3 + 6),
+            ],
+            [
+                (1 + 1) / (6 + 6),
+                (3 + 1) / (6 + 6),
+                (0 + 1) / (6 + 6),
+                (1 + 1) / (6 + 6),
+                (1 + 1) / (6 + 6),
+                (0 + 1) / (6 + 6),
+            ],
+        ]
+    )
+
+    weights = np.zeros(theta.shape)
+    normed_weights = np.zeros(theta.shape)
+    for i in range(2):
+        weights[i] = -np.log(theta[i])
+        normed_weights[i] = weights[i] / weights[i].sum()
+
+    # Verify inputs are nonnegative.
+    clf = ComplementNB(alpha=1.0)
+
+    msg = re.escape("Negative values in data passed to ComplementNB (input X)")
+    with pytest.raises(ValueError, match=msg):
+        clf.fit(-X, Y)
+
+    clf.fit(X, Y)
+
+    # Check that counts/weights are correct.
+    feature_count = np.array([[1, 3, 0, 1, 1, 0], [0, 1, 1, 0, 0, 1]])
+    assert_array_equal(clf.feature_count_, feature_count)
+    class_count = np.array([3, 1])
+    assert_array_equal(clf.class_count_, class_count)
+    feature_all = np.array([1, 4, 1, 1, 1, 1])
+    assert_array_equal(clf.feature_all_, feature_all)
+    assert_array_almost_equal(clf.feature_log_prob_, weights)
+
+    clf = ComplementNB(alpha=1.0, norm=True)
+    clf.fit(X, Y)
+    assert_array_almost_equal(clf.feature_log_prob_, normed_weights)
+
+
+def test_categoricalnb(global_random_seed):
+    # Check the ability to predict the training set.
+    clf = CategoricalNB()
+    X2, y2 = get_random_integer_x_three_classes_y(global_random_seed)
+
+    y_pred = clf.fit(X2, y2).predict(X2)
+    assert_array_equal(y_pred, y2)
+
+    X3 = np.array([[1, 4], [2, 5]])
+    y3 = np.array([1, 2])
+    clf = CategoricalNB(alpha=1, fit_prior=False)
+
+    clf.fit(X3, y3)
+    assert_array_equal(clf.n_categories_, np.array([3, 6]))
+
+    # Check error is raised for X with negative entries
+    X = np.array([[0, -1]])
+    y = np.array([1])
+    error_msg = re.escape("Negative values in data passed to CategoricalNB (input X)")
+    with pytest.raises(ValueError, match=error_msg):
+        clf.predict(X)
+    with pytest.raises(ValueError, match=error_msg):
+        clf.fit(X, y)
+
+    # Test alpha
+    X3_test = np.array([[2, 5]])
+    # alpha=1 increases the count of all categories by one so the final
+    # probability for each category is not 50/50 but 1/3 to 2/3
+    bayes_numerator = np.array([[1 / 3 * 1 / 3, 2 / 3 * 2 / 3]])
+    bayes_denominator = bayes_numerator.sum()
+    assert_array_almost_equal(
+        clf.predict_proba(X3_test), bayes_numerator / bayes_denominator
+    )
+
+    # Assert category_count has counted all features
+    assert len(clf.category_count_) == X3.shape[1]
+
+    # Check sample_weight
+    X = np.array([[0, 0], [0, 1], [0, 0], [1, 1]])
+    y = np.array([1, 1, 2, 2])
+    clf = CategoricalNB(alpha=1, fit_prior=False)
+    clf.fit(X, y)
+    assert_array_equal(clf.predict(np.array([[0, 0]])), np.array([1]))
+    assert_array_equal(clf.n_categories_, np.array([2, 2]))
+
+    for factor in [1.0, 0.3, 5, 0.0001]:
+        X = np.array([[0, 0], [0, 1], [0, 0], [1, 1]])
+        y = np.array([1, 1, 2, 2])
+        sample_weight = np.array([1, 1, 10, 0.1]) * factor
+        clf = CategoricalNB(alpha=1, fit_prior=False)
+        clf.fit(X, y, sample_weight=sample_weight)
+        assert_array_equal(clf.predict(np.array([[0, 0]])), np.array([2]))
+        assert_array_equal(clf.n_categories_, np.array([2, 2]))
+
+
+@pytest.mark.parametrize(
+    "min_categories, exp_X1_count, exp_X2_count, new_X, exp_n_categories_",
+    [
+        # check min_categories with int > observed categories
+        (
+            3,
+            np.array([[2, 0, 0], [1, 1, 0]]),
+            np.array([[1, 1, 0], [1, 1, 0]]),
+            np.array([[0, 2]]),
+            np.array([3, 3]),
+        ),
+        # check with list input
+        (
+            [3, 4],
+            np.array([[2, 0, 0], [1, 1, 0]]),
+            np.array([[1, 1, 0, 0], [1, 1, 0, 0]]),
+            np.array([[0, 3]]),
+            np.array([3, 4]),
+        ),
+        # check min_categories with min less than actual
+        (
+            [
+                1,
+                np.array([[2, 0], [1, 1]]),
+                np.array([[1, 1], [1, 1]]),
+                np.array([[0, 1]]),
+                np.array([2, 2]),
+            ]
+        ),
+    ],
+)
+def test_categoricalnb_with_min_categories(
+    min_categories, exp_X1_count, exp_X2_count, new_X, exp_n_categories_
+):
+    X_n_categories = np.array([[0, 0], [0, 1], [0, 0], [1, 1]])
+    y_n_categories = np.array([1, 1, 2, 2])
+    expected_prediction = np.array([1])
+
+    clf = CategoricalNB(alpha=1, fit_prior=False, min_categories=min_categories)
+    clf.fit(X_n_categories, y_n_categories)
+    X1_count, X2_count = clf.category_count_
+    assert_array_equal(X1_count, exp_X1_count)
+    assert_array_equal(X2_count, exp_X2_count)
+    predictions = clf.predict(new_X)
+    assert_array_equal(predictions, expected_prediction)
+    assert_array_equal(clf.n_categories_, exp_n_categories_)
+
+
+@pytest.mark.parametrize(
+    "min_categories, error_msg",
+    [
+        ([[3, 2], [2, 4]], "'min_categories' should have shape"),
+    ],
+)
+def test_categoricalnb_min_categories_errors(min_categories, error_msg):
+    X = np.array([[0, 0], [0, 1], [0, 0], [1, 1]])
+    y = np.array([1, 1, 2, 2])
+
+    clf = CategoricalNB(alpha=1, fit_prior=False, min_categories=min_categories)
+    with pytest.raises(ValueError, match=error_msg):
+        clf.fit(X, y)
+
+
+@pytest.mark.parametrize("csr_container", CSR_CONTAINERS)
+def test_alpha(csr_container):
+    # Setting alpha=0 should not output nan results when p(x_i|y_j)=0 is a case
+    X = np.array([[1, 0], [1, 1]])
+    y = np.array([0, 1])
+    nb = BernoulliNB(alpha=0.0, force_alpha=False)
+    msg = "alpha too small will result in numeric errors, setting alpha = 1.0e-10"
+    with pytest.warns(UserWarning, match=msg):
+        nb.partial_fit(X, y, classes=[0, 1])
+    with pytest.warns(UserWarning, match=msg):
+        nb.fit(X, y)
+    prob = np.array([[1, 0], [0, 1]])
+    assert_array_almost_equal(nb.predict_proba(X), prob)
+
+    nb = MultinomialNB(alpha=0.0, force_alpha=False)
+    with pytest.warns(UserWarning, match=msg):
+        nb.partial_fit(X, y, classes=[0, 1])
+    with pytest.warns(UserWarning, match=msg):
+        nb.fit(X, y)
+    prob = np.array([[2.0 / 3, 1.0 / 3], [0, 1]])
+    assert_array_almost_equal(nb.predict_proba(X), prob)
+
+    nb = CategoricalNB(alpha=0.0, force_alpha=False)
+    with pytest.warns(UserWarning, match=msg):
+        nb.fit(X, y)
+    prob = np.array([[1.0, 0.0], [0.0, 1.0]])
+    assert_array_almost_equal(nb.predict_proba(X), prob)
+
+    # Test sparse X
+    X = csr_container(X)
+    nb = BernoulliNB(alpha=0.0, force_alpha=False)
+    with pytest.warns(UserWarning, match=msg):
+        nb.fit(X, y)
+    prob = np.array([[1, 0], [0, 1]])
+    assert_array_almost_equal(nb.predict_proba(X), prob)
+
+    nb = MultinomialNB(alpha=0.0, force_alpha=False)
+    with pytest.warns(UserWarning, match=msg):
+        nb.fit(X, y)
+    prob = np.array([[2.0 / 3, 1.0 / 3], [0, 1]])
+    assert_array_almost_equal(nb.predict_proba(X), prob)
+
+
+def test_alpha_vector():
+    X = np.array([[1, 0], [1, 1]])
+    y = np.array([0, 1])
+
+    # Setting alpha=np.array with same length
+    # as number of features should be fine
+    alpha = np.array([1, 2])
+    nb = MultinomialNB(alpha=alpha, force_alpha=False)
+    nb.partial_fit(X, y, classes=[0, 1])
+
+    # Test feature probabilities uses pseudo-counts (alpha)
+    feature_prob = np.array([[1 / 2, 1 / 2], [2 / 5, 3 / 5]])
+    assert_array_almost_equal(nb.feature_log_prob_, np.log(feature_prob))
+
+    # Test predictions
+    prob = np.array([[5 / 9, 4 / 9], [25 / 49, 24 / 49]])
+    assert_array_almost_equal(nb.predict_proba(X), prob)
+
+    # Test alpha non-negative
+    alpha = np.array([1.0, -0.1])
+    m_nb = MultinomialNB(alpha=alpha, force_alpha=False)
+    expected_msg = "All values in alpha must be greater than 0."
+    with pytest.raises(ValueError, match=expected_msg):
+        m_nb.fit(X, y)
+
+    # Test that too small pseudo-counts are replaced
+    ALPHA_MIN = 1e-10
+    alpha = np.array([ALPHA_MIN / 2, 0.5])
+    m_nb = MultinomialNB(alpha=alpha, force_alpha=False)
+    m_nb.partial_fit(X, y, classes=[0, 1])
+    assert_array_almost_equal(m_nb._check_alpha(), [ALPHA_MIN, 0.5], decimal=12)
+
+    # Test correct dimensions
+    alpha = np.array([1.0, 2.0, 3.0])
+    m_nb = MultinomialNB(alpha=alpha, force_alpha=False)
+    expected_msg = "When alpha is an array, it should contains `n_features`"
+    with pytest.raises(ValueError, match=expected_msg):
+        m_nb.fit(X, y)
+
+
+def test_check_accuracy_on_digits():
+    # Non regression test to make sure that any further refactoring / optim
+    # of the NB models do not harm the performance on a slightly non-linearly
+    # separable dataset
+    X, y = load_digits(return_X_y=True)
+    binary_3v8 = np.logical_or(y == 3, y == 8)
+    X_3v8, y_3v8 = X[binary_3v8], y[binary_3v8]
+
+    # Multinomial NB
+    scores = cross_val_score(MultinomialNB(alpha=10), X, y, cv=10)
+    assert scores.mean() > 0.86
+
+    scores = cross_val_score(MultinomialNB(alpha=10), X_3v8, y_3v8, cv=10)
+    assert scores.mean() > 0.94
+
+    # Bernoulli NB
+    scores = cross_val_score(BernoulliNB(alpha=10), X > 4, y, cv=10)
+    assert scores.mean() > 0.83
+
+    scores = cross_val_score(BernoulliNB(alpha=10), X_3v8 > 4, y_3v8, cv=10)
+    assert scores.mean() > 0.92
+
+    # Gaussian NB
+    scores = cross_val_score(GaussianNB(), X, y, cv=10)
+    assert scores.mean() > 0.77
+
+    scores = cross_val_score(GaussianNB(var_smoothing=0.1), X, y, cv=10)
+    assert scores.mean() > 0.89
+
+    scores = cross_val_score(GaussianNB(), X_3v8, y_3v8, cv=10)
+    assert scores.mean() > 0.86
+
+
+def test_check_alpha():
+    """The provided value for alpha must only be
+    used if alpha < _ALPHA_MIN and force_alpha is True.
+
+    Non-regression test for:
+    https://github.com/scikit-learn/scikit-learn/issues/10772
+    """
+    _ALPHA_MIN = 1e-10
+    b = BernoulliNB(alpha=0, force_alpha=True)
+    assert b._check_alpha() == 0
+
+    alphas = np.array([0.0, 1.0])
+
+    b = BernoulliNB(alpha=alphas, force_alpha=True)
+    # We manually set `n_features_in_` not to have `_check_alpha` err
+    b.n_features_in_ = alphas.shape[0]
+    assert_array_equal(b._check_alpha(), alphas)
+
+    msg = (
+        "alpha too small will result in numeric errors, setting alpha = %.1e"
+        % _ALPHA_MIN
+    )
+    b = BernoulliNB(alpha=0, force_alpha=False)
+    with pytest.warns(UserWarning, match=msg):
+        assert b._check_alpha() == _ALPHA_MIN
+
+    b = BernoulliNB(alpha=0, force_alpha=False)
+    with pytest.warns(UserWarning, match=msg):
+        assert b._check_alpha() == _ALPHA_MIN
+
+    b = BernoulliNB(alpha=alphas, force_alpha=False)
+    # We manually set `n_features_in_` not to have `_check_alpha` err
+    b.n_features_in_ = alphas.shape[0]
+    with pytest.warns(UserWarning, match=msg):
+        assert_array_equal(b._check_alpha(), np.array([_ALPHA_MIN, 1.0]))
+
+
+@pytest.mark.parametrize("Estimator", ALL_NAIVE_BAYES_CLASSES)
+def test_predict_joint_proba(Estimator, global_random_seed):
+    X2, y2 = get_random_integer_x_three_classes_y(global_random_seed)
+    est = Estimator().fit(X2, y2)
+    jll = est.predict_joint_log_proba(X2)
+    log_prob_x = logsumexp(jll, axis=1)
+    log_prob_x_y = jll - np.atleast_2d(log_prob_x).T
+    assert_allclose(est.predict_log_proba(X2), log_prob_x_y)
@@ -0,0 +1,403 @@
+from importlib import import_module
+from inspect import signature
+from numbers import Integral, Real
+
+import pytest
+
+from sklearn.utils._param_validation import (
+    Interval,
+    InvalidParameterError,
+    generate_invalid_param_val,
+    generate_valid_param,
+    make_constraint,
+)
+
+
+def _get_func_info(func_module):
+    module_name, func_name = func_module.rsplit(".", 1)
+    module = import_module(module_name)
+    func = getattr(module, func_name)
+
+    func_sig = signature(func)
+    func_params = [
+        p.name
+        for p in func_sig.parameters.values()
+        if p.kind not in (p.VAR_POSITIONAL, p.VAR_KEYWORD)
+    ]
+
+    # The parameters `*args` and `**kwargs` are ignored since we cannot generate
+    # constraints.
+    required_params = [
+        p.name
+        for p in func_sig.parameters.values()
+        if p.default is p.empty and p.kind not in (p.VAR_POSITIONAL, p.VAR_KEYWORD)
+    ]
+
+    return func, func_name, func_params, required_params
+
+
+def _check_function_param_validation(
+    func, func_name, func_params, required_params, parameter_constraints
+):
+    """Check that an informative error is raised when the value of a parameter does not
+    have an appropriate type or value.
+    """
+    # generate valid values for the required parameters
+    valid_required_params = {}
+    for param_name in required_params:
+        if parameter_constraints[param_name] == "no_validation":
+            valid_required_params[param_name] = 1
+        else:
+            valid_required_params[param_name] = generate_valid_param(
+                make_constraint(parameter_constraints[param_name][0])
+            )
+
+    # check that there is a constraint for each parameter
+    if func_params:
+        validation_params = parameter_constraints.keys()
+        unexpected_params = set(validation_params) - set(func_params)
+        missing_params = set(func_params) - set(validation_params)
+        err_msg = (
+            "Mismatch between _parameter_constraints and the parameters of"
+            f" {func_name}.\nConsider the unexpected parameters {unexpected_params} and"
+            f" expected but missing parameters {missing_params}\n"
+        )
+        assert set(validation_params) == set(func_params), err_msg
+
+    # this object does not have a valid type for sure for all params
+    param_with_bad_type = type("BadType", (), {})()
+
+    for param_name in func_params:
+        constraints = parameter_constraints[param_name]
+
+        if constraints == "no_validation":
+            # This parameter is not validated
+            continue
+
+        # Mixing an interval of reals and an interval of integers must be avoided.
+        if any(
+            isinstance(constraint, Interval) and constraint.type == Integral
+            for constraint in constraints
+        ) and any(
+            isinstance(constraint, Interval) and constraint.type == Real
+            for constraint in constraints
+        ):
+            raise ValueError(
+                f"The constraint for parameter {param_name} of {func_name} can't have a"
+                " mix of intervals of Integral and Real types. Use the type"
+                " RealNotInt instead of Real."
+            )
+
+        match = (
+            rf"The '{param_name}' parameter of {func_name} must be .* Got .* instead."
+        )
+
+        err_msg = (
+            f"{func_name} does not raise an informative error message when the "
+            f"parameter {param_name} does not have a valid type. If any Python type "
+            "is valid, the constraint should be 'no_validation'."
+        )
+
+        # First, check that the error is raised if param doesn't match any valid type.
+        with pytest.raises(InvalidParameterError, match=match):
+            func(**{**valid_required_params, param_name: param_with_bad_type})
+            pytest.fail(err_msg)
+
+        # Then, for constraints that are more than a type constraint, check that the
+        # error is raised if param does match a valid type but does not match any valid
+        # value for this type.
+        constraints = [make_constraint(constraint) for constraint in constraints]
+
+        for constraint in constraints:
+            try:
+                bad_value = generate_invalid_param_val(constraint)
+            except NotImplementedError:
+                continue
+
+            err_msg = (
+                f"{func_name} does not raise an informative error message when the "
+                f"parameter {param_name} does not have a valid value.\n"
+                "Constraints should be disjoint. For instance "
+                "[StrOptions({'a_string'}), str] is not a acceptable set of "
+                "constraint because generating an invalid string for the first "
+                "constraint will always produce a valid string for the second "
+                "constraint."
+            )
+
+            with pytest.raises(InvalidParameterError, match=match):
+                func(**{**valid_required_params, param_name: bad_value})
+                pytest.fail(err_msg)
+
+
+PARAM_VALIDATION_FUNCTION_LIST = [
+    "sklearn.calibration.calibration_curve",
+    "sklearn.cluster.cluster_optics_dbscan",
+    "sklearn.cluster.compute_optics_graph",
+    "sklearn.cluster.estimate_bandwidth",
+    "sklearn.cluster.kmeans_plusplus",
+    "sklearn.cluster.cluster_optics_xi",
+    "sklearn.cluster.ward_tree",
+    "sklearn.covariance.empirical_covariance",
+    "sklearn.covariance.ledoit_wolf_shrinkage",
+    "sklearn.covariance.log_likelihood",
+    "sklearn.covariance.shrunk_covariance",
+    "sklearn.datasets.clear_data_home",
+    "sklearn.datasets.dump_svmlight_file",
+    "sklearn.datasets.fetch_20newsgroups",
+    "sklearn.datasets.fetch_20newsgroups_vectorized",
+    "sklearn.datasets.fetch_california_housing",
+    "sklearn.datasets.fetch_covtype",
+    "sklearn.datasets.fetch_kddcup99",
+    "sklearn.datasets.fetch_lfw_pairs",
+    "sklearn.datasets.fetch_lfw_people",
+    "sklearn.datasets.fetch_olivetti_faces",
+    "sklearn.datasets.fetch_rcv1",
+    "sklearn.datasets.fetch_openml",
+    "sklearn.datasets.fetch_species_distributions",
+    "sklearn.datasets.get_data_home",
+    "sklearn.datasets.load_breast_cancer",
+    "sklearn.datasets.load_diabetes",
+    "sklearn.datasets.load_digits",
+    "sklearn.datasets.load_files",
+    "sklearn.datasets.load_iris",
+    "sklearn.datasets.load_linnerud",
+    "sklearn.datasets.load_sample_image",
+    "sklearn.datasets.load_svmlight_file",
+    "sklearn.datasets.load_svmlight_files",
+    "sklearn.datasets.load_wine",
+    "sklearn.datasets.make_biclusters",
+    "sklearn.datasets.make_blobs",
+    "sklearn.datasets.make_checkerboard",
+    "sklearn.datasets.make_circles",
+    "sklearn.datasets.make_classification",
+    "sklearn.datasets.make_friedman1",
+    "sklearn.datasets.make_friedman2",
+    "sklearn.datasets.make_friedman3",
+    "sklearn.datasets.make_gaussian_quantiles",
+    "sklearn.datasets.make_hastie_10_2",
+    "sklearn.datasets.make_low_rank_matrix",
+    "sklearn.datasets.make_moons",
+    "sklearn.datasets.make_multilabel_classification",
+    "sklearn.datasets.make_regression",
+    "sklearn.datasets.make_s_curve",
+    "sklearn.datasets.make_sparse_coded_signal",
+    "sklearn.datasets.make_sparse_spd_matrix",
+    "sklearn.datasets.make_sparse_uncorrelated",
+    "sklearn.datasets.make_spd_matrix",
+    "sklearn.datasets.make_swiss_roll",
+    "sklearn.decomposition.sparse_encode",
+    "sklearn.feature_extraction.grid_to_graph",
+    "sklearn.feature_extraction.img_to_graph",
+    "sklearn.feature_extraction.image.extract_patches_2d",
+    "sklearn.feature_extraction.image.reconstruct_from_patches_2d",
+    "sklearn.feature_selection.chi2",
+    "sklearn.feature_selection.f_classif",
+    "sklearn.feature_selection.f_regression",
+    "sklearn.feature_selection.mutual_info_classif",
+    "sklearn.feature_selection.mutual_info_regression",
+    "sklearn.feature_selection.r_regression",
+    "sklearn.inspection.partial_dependence",
+    "sklearn.inspection.permutation_importance",
+    "sklearn.isotonic.check_increasing",
+    "sklearn.isotonic.isotonic_regression",
+    "sklearn.linear_model.enet_path",
+    "sklearn.linear_model.lars_path",
+    "sklearn.linear_model.lars_path_gram",
+    "sklearn.linear_model.lasso_path",
+    "sklearn.linear_model.orthogonal_mp",
+    "sklearn.linear_model.orthogonal_mp_gram",
+    "sklearn.linear_model.ridge_regression",
+    "sklearn.manifold.locally_linear_embedding",
+    "sklearn.manifold.smacof",
+    "sklearn.manifold.spectral_embedding",
+    "sklearn.manifold.trustworthiness",
+    "sklearn.metrics.accuracy_score",
+    "sklearn.metrics.auc",
+    "sklearn.metrics.average_precision_score",
+    "sklearn.metrics.balanced_accuracy_score",
+    "sklearn.metrics.brier_score_loss",
+    "sklearn.metrics.calinski_harabasz_score",
+    "sklearn.metrics.check_scoring",
+    "sklearn.metrics.completeness_score",
+    "sklearn.metrics.class_likelihood_ratios",
+    "sklearn.metrics.classification_report",
+    "sklearn.metrics.cluster.adjusted_mutual_info_score",
+    "sklearn.metrics.cluster.contingency_matrix",
+    "sklearn.metrics.cluster.entropy",
+    "sklearn.metrics.cluster.fowlkes_mallows_score",
+    "sklearn.metrics.cluster.homogeneity_completeness_v_measure",
+    "sklearn.metrics.cluster.normalized_mutual_info_score",
+    "sklearn.metrics.cluster.silhouette_samples",
+    "sklearn.metrics.cluster.silhouette_score",
+    "sklearn.metrics.cohen_kappa_score",
+    "sklearn.metrics.confusion_matrix",
+    "sklearn.metrics.consensus_score",
+    "sklearn.metrics.coverage_error",
+    "sklearn.metrics.d2_absolute_error_score",
+    "sklearn.metrics.d2_log_loss_score",
+    "sklearn.metrics.d2_pinball_score",
+    "sklearn.metrics.d2_tweedie_score",
+    "sklearn.metrics.davies_bouldin_score",
+    "sklearn.metrics.dcg_score",
+    "sklearn.metrics.det_curve",
+    "sklearn.metrics.explained_variance_score",
+    "sklearn.metrics.f1_score",
+    "sklearn.metrics.fbeta_score",
+    "sklearn.metrics.get_scorer",
+    "sklearn.metrics.hamming_loss",
+    "sklearn.metrics.hinge_loss",
+    "sklearn.metrics.homogeneity_score",
+    "sklearn.metrics.jaccard_score",
+    "sklearn.metrics.label_ranking_average_precision_score",
+    "sklearn.metrics.label_ranking_loss",
+    "sklearn.metrics.log_loss",
+    "sklearn.metrics.make_scorer",
+    "sklearn.metrics.matthews_corrcoef",
+    "sklearn.metrics.max_error",
+    "sklearn.metrics.mean_absolute_error",
+    "sklearn.metrics.mean_absolute_percentage_error",
+    "sklearn.metrics.mean_gamma_deviance",
+    "sklearn.metrics.mean_pinball_loss",
+    "sklearn.metrics.mean_poisson_deviance",
+    "sklearn.metrics.mean_squared_error",
+    "sklearn.metrics.mean_squared_log_error",
+    "sklearn.metrics.mean_tweedie_deviance",
+    "sklearn.metrics.median_absolute_error",
+    "sklearn.metrics.multilabel_confusion_matrix",
+    "sklearn.metrics.mutual_info_score",
+    "sklearn.metrics.ndcg_score",
+    "sklearn.metrics.pair_confusion_matrix",
+    "sklearn.metrics.adjusted_rand_score",
+    "sklearn.metrics.pairwise.additive_chi2_kernel",
+    "sklearn.metrics.pairwise.chi2_kernel",
+    "sklearn.metrics.pairwise.cosine_distances",
+    "sklearn.metrics.pairwise.cosine_similarity",
+    "sklearn.metrics.pairwise.euclidean_distances",
+    "sklearn.metrics.pairwise.haversine_distances",
+    "sklearn.metrics.pairwise.laplacian_kernel",
+    "sklearn.metrics.pairwise.linear_kernel",
+    "sklearn.metrics.pairwise.manhattan_distances",
+    "sklearn.metrics.pairwise.nan_euclidean_distances",
+    "sklearn.metrics.pairwise.paired_cosine_distances",
+    "sklearn.metrics.pairwise.paired_distances",
+    "sklearn.metrics.pairwise.paired_euclidean_distances",
+    "sklearn.metrics.pairwise.paired_manhattan_distances",
+    "sklearn.metrics.pairwise.pairwise_distances_argmin_min",
+    "sklearn.metrics.pairwise.pairwise_kernels",
+    "sklearn.metrics.pairwise.polynomial_kernel",
+    "sklearn.metrics.pairwise.rbf_kernel",
+    "sklearn.metrics.pairwise.sigmoid_kernel",
+    "sklearn.metrics.pairwise_distances",
+    "sklearn.metrics.pairwise_distances_argmin",
+    "sklearn.metrics.pairwise_distances_chunked",
+    "sklearn.metrics.precision_recall_curve",
+    "sklearn.metrics.precision_recall_fscore_support",
+    "sklearn.metrics.precision_score",
+    "sklearn.metrics.r2_score",
+    "sklearn.metrics.rand_score",
+    "sklearn.metrics.recall_score",
+    "sklearn.metrics.roc_auc_score",
+    "sklearn.metrics.roc_curve",
+    "sklearn.metrics.root_mean_squared_error",
+    "sklearn.metrics.root_mean_squared_log_error",
+    "sklearn.metrics.top_k_accuracy_score",
+    "sklearn.metrics.v_measure_score",
+    "sklearn.metrics.zero_one_loss",
+    "sklearn.model_selection.cross_val_predict",
+    "sklearn.model_selection.cross_val_score",
+    "sklearn.model_selection.cross_validate",
+    "sklearn.model_selection.learning_curve",
+    "sklearn.model_selection.permutation_test_score",
+    "sklearn.model_selection.train_test_split",
+    "sklearn.model_selection.validation_curve",
+    "sklearn.neighbors.kneighbors_graph",
+    "sklearn.neighbors.radius_neighbors_graph",
+    "sklearn.neighbors.sort_graph_by_row_values",
+    "sklearn.preprocessing.add_dummy_feature",
+    "sklearn.preprocessing.binarize",
+    "sklearn.preprocessing.label_binarize",
+    "sklearn.preprocessing.normalize",
+    "sklearn.preprocessing.scale",
+    "sklearn.random_projection.johnson_lindenstrauss_min_dim",
+    "sklearn.svm.l1_min_c",
+    "sklearn.tree.export_graphviz",
+    "sklearn.tree.export_text",
+    "sklearn.tree.plot_tree",
+    "sklearn.utils.gen_batches",
+    "sklearn.utils.gen_even_slices",
+    "sklearn.utils.resample",
+    "sklearn.utils.safe_mask",
+    "sklearn.utils.extmath.randomized_svd",
+    "sklearn.utils.class_weight.compute_class_weight",
+    "sklearn.utils.class_weight.compute_sample_weight",
+    "sklearn.utils.graph.single_source_shortest_path_length",
+]
+
+
+@pytest.mark.parametrize("func_module", PARAM_VALIDATION_FUNCTION_LIST)
+def test_function_param_validation(func_module):
+    """Check param validation for public functions that are not wrappers around
+    estimators.
+    """
+    func, func_name, func_params, required_params = _get_func_info(func_module)
+
+    parameter_constraints = getattr(func, "_skl_parameter_constraints")
+
+    _check_function_param_validation(
+        func, func_name, func_params, required_params, parameter_constraints
+    )
+
+
+PARAM_VALIDATION_CLASS_WRAPPER_LIST = [
+    ("sklearn.cluster.affinity_propagation", "sklearn.cluster.AffinityPropagation"),
+    ("sklearn.cluster.dbscan", "sklearn.cluster.DBSCAN"),
+    ("sklearn.cluster.k_means", "sklearn.cluster.KMeans"),
+    ("sklearn.cluster.mean_shift", "sklearn.cluster.MeanShift"),
+    ("sklearn.cluster.spectral_clustering", "sklearn.cluster.SpectralClustering"),
+    ("sklearn.covariance.graphical_lasso", "sklearn.covariance.GraphicalLasso"),
+    ("sklearn.covariance.ledoit_wolf", "sklearn.covariance.LedoitWolf"),
+    ("sklearn.covariance.oas", "sklearn.covariance.OAS"),
+    ("sklearn.decomposition.dict_learning", "sklearn.decomposition.DictionaryLearning"),
+    (
+        "sklearn.decomposition.dict_learning_online",
+        "sklearn.decomposition.MiniBatchDictionaryLearning",
+    ),
+    ("sklearn.decomposition.fastica", "sklearn.decomposition.FastICA"),
+    ("sklearn.decomposition.non_negative_factorization", "sklearn.decomposition.NMF"),
+    ("sklearn.preprocessing.maxabs_scale", "sklearn.preprocessing.MaxAbsScaler"),
+    ("sklearn.preprocessing.minmax_scale", "sklearn.preprocessing.MinMaxScaler"),
+    ("sklearn.preprocessing.power_transform", "sklearn.preprocessing.PowerTransformer"),
+    (
+        "sklearn.preprocessing.quantile_transform",
+        "sklearn.preprocessing.QuantileTransformer",
+    ),
+    ("sklearn.preprocessing.robust_scale", "sklearn.preprocessing.RobustScaler"),
+]
+
+
+@pytest.mark.parametrize(
+    "func_module, class_module", PARAM_VALIDATION_CLASS_WRAPPER_LIST
+)
+def test_class_wrapper_param_validation(func_module, class_module):
+    """Check param validation for public functions that are wrappers around
+    estimators.
+    """
+    func, func_name, func_params, required_params = _get_func_info(func_module)
+
+    module_name, class_name = class_module.rsplit(".", 1)
+    module = import_module(module_name)
+    klass = getattr(module, class_name)
+
+    parameter_constraints_func = getattr(func, "_skl_parameter_constraints")
+    parameter_constraints_class = getattr(klass, "_parameter_constraints")
+    parameter_constraints = {
+        **parameter_constraints_class,
+        **parameter_constraints_func,
+    }
+    parameter_constraints = {
+        k: v for k, v in parameter_constraints.items() if k in func_params
+    }
+
+    _check_function_param_validation(
+        func, func_name, func_params, required_params, parameter_constraints
+    )
@@ -0,0 +1,584 @@
+import functools
+import warnings
+from typing import Any, List
+
+import numpy as np
+import pytest
+import scipy.sparse as sp
+
+from sklearn.exceptions import DataDimensionalityWarning, NotFittedError
+from sklearn.metrics import euclidean_distances
+from sklearn.random_projection import (
+    GaussianRandomProjection,
+    SparseRandomProjection,
+    _gaussian_random_matrix,
+    _sparse_random_matrix,
+    johnson_lindenstrauss_min_dim,
+)
+from sklearn.utils._testing import (
+    assert_allclose,
+    assert_allclose_dense_sparse,
+    assert_almost_equal,
+    assert_array_almost_equal,
+    assert_array_equal,
+)
+from sklearn.utils.fixes import COO_CONTAINERS
+
+all_sparse_random_matrix: List[Any] = [_sparse_random_matrix]
+all_dense_random_matrix: List[Any] = [_gaussian_random_matrix]
+all_random_matrix = all_sparse_random_matrix + all_dense_random_matrix
+
+all_SparseRandomProjection: List[Any] = [SparseRandomProjection]
+all_DenseRandomProjection: List[Any] = [GaussianRandomProjection]
+all_RandomProjection = all_SparseRandomProjection + all_DenseRandomProjection
+
+
+def make_sparse_random_data(
+    coo_container,
+    n_samples,
+    n_features,
+    n_nonzeros,
+    random_state=None,
+    sparse_format="csr",
+):
+    """Make some random data with uniformly located non zero entries with
+    Gaussian distributed values; `sparse_format` can be `"csr"` (default) or
+    `None` (in which case a dense array is returned).
+    """
+    rng = np.random.RandomState(random_state)
+    data_coo = coo_container(
+        (
+            rng.randn(n_nonzeros),
+            (
+                rng.randint(n_samples, size=n_nonzeros),
+                rng.randint(n_features, size=n_nonzeros),
+            ),
+        ),
+        shape=(n_samples, n_features),
+    )
+    if sparse_format is not None:
+        return data_coo.asformat(sparse_format)
+    else:
+        return data_coo.toarray()
+
+
+def densify(matrix):
+    if not sp.issparse(matrix):
+        return matrix
+    else:
+        return matrix.toarray()
+
+
+n_samples, n_features = (10, 1000)
+n_nonzeros = int(n_samples * n_features / 100.0)
+
+
+###############################################################################
+# test on JL lemma
+###############################################################################
+
+
+@pytest.mark.parametrize(
+    "n_samples, eps",
+    [
+        ([100, 110], [0.9, 1.1]),
+        ([90, 100], [0.1, 0.0]),
+        ([50, -40], [0.1, 0.2]),
+    ],
+)
+def test_invalid_jl_domain(n_samples, eps):
+    with pytest.raises(ValueError):
+        johnson_lindenstrauss_min_dim(n_samples, eps=eps)
+
+
+def test_input_size_jl_min_dim():
+    with pytest.raises(ValueError):
+        johnson_lindenstrauss_min_dim(3 * [100], eps=2 * [0.9])
+
+    johnson_lindenstrauss_min_dim(
+        np.random.randint(1, 10, size=(10, 10)), eps=np.full((10, 10), 0.5)
+    )
+
+
+###############################################################################
+# tests random matrix generation
+###############################################################################
+def check_input_size_random_matrix(random_matrix):
+    inputs = [(0, 0), (-1, 1), (1, -1), (1, 0), (-1, 0)]
+    for n_components, n_features in inputs:
+        with pytest.raises(ValueError):
+            random_matrix(n_components, n_features)
+
+
+def check_size_generated(random_matrix):
+    inputs = [(1, 5), (5, 1), (5, 5), (1, 1)]
+    for n_components, n_features in inputs:
+        assert random_matrix(n_components, n_features).shape == (
+            n_components,
+            n_features,
+        )
+
+
+def check_zero_mean_and_unit_norm(random_matrix):
+    # All random matrix should produce a transformation matrix
+    # with zero mean and unit norm for each columns
+
+    A = densify(random_matrix(10000, 1, random_state=0))
+
+    assert_array_almost_equal(0, np.mean(A), 3)
+    assert_array_almost_equal(1.0, np.linalg.norm(A), 1)
+
+
+def check_input_with_sparse_random_matrix(random_matrix):
+    n_components, n_features = 5, 10
+
+    for density in [-1.0, 0.0, 1.1]:
+        with pytest.raises(ValueError):
+            random_matrix(n_components, n_features, density=density)
+
+
+@pytest.mark.parametrize("random_matrix", all_random_matrix)
+def test_basic_property_of_random_matrix(random_matrix):
+    # Check basic properties of random matrix generation
+    check_input_size_random_matrix(random_matrix)
+    check_size_generated(random_matrix)
+    check_zero_mean_and_unit_norm(random_matrix)
+
+
+@pytest.mark.parametrize("random_matrix", all_sparse_random_matrix)
+def test_basic_property_of_sparse_random_matrix(random_matrix):
+    check_input_with_sparse_random_matrix(random_matrix)
+
+    random_matrix_dense = functools.partial(random_matrix, density=1.0)
+
+    check_zero_mean_and_unit_norm(random_matrix_dense)
+
+
+def test_gaussian_random_matrix():
+    # Check some statical properties of Gaussian random matrix
+    # Check that the random matrix follow the proper distribution.
+    # Let's say that each element of a_{ij} of A is taken from
+    #   a_ij ~ N(0.0, 1 / n_components).
+    #
+    n_components = 100
+    n_features = 1000
+    A = _gaussian_random_matrix(n_components, n_features, random_state=0)
+
+    assert_array_almost_equal(0.0, np.mean(A), 2)
+    assert_array_almost_equal(np.var(A, ddof=1), 1 / n_components, 1)
+
+
+def test_sparse_random_matrix():
+    # Check some statical properties of sparse random matrix
+    n_components = 100
+    n_features = 500
+
+    for density in [0.3, 1.0]:
+        s = 1 / density
+
+        A = _sparse_random_matrix(
+            n_components, n_features, density=density, random_state=0
+        )
+        A = densify(A)
+
+        # Check possible values
+        values = np.unique(A)
+        assert np.sqrt(s) / np.sqrt(n_components) in values
+        assert -np.sqrt(s) / np.sqrt(n_components) in values
+
+        if density == 1.0:
+            assert np.size(values) == 2
+        else:
+            assert 0.0 in values
+            assert np.size(values) == 3
+
+        # Check that the random matrix follow the proper distribution.
+        # Let's say that each element of a_{ij} of A is taken from
+        #
+        # - -sqrt(s) / sqrt(n_components)   with probability 1 / 2s
+        # -  0                              with probability 1 - 1 / s
+        # - +sqrt(s) / sqrt(n_components)   with probability 1 / 2s
+        #
+        assert_almost_equal(np.mean(A == 0.0), 1 - 1 / s, decimal=2)
+        assert_almost_equal(
+            np.mean(A == np.sqrt(s) / np.sqrt(n_components)), 1 / (2 * s), decimal=2
+        )
+        assert_almost_equal(
+            np.mean(A == -np.sqrt(s) / np.sqrt(n_components)), 1 / (2 * s), decimal=2
+        )
+
+        assert_almost_equal(np.var(A == 0.0, ddof=1), (1 - 1 / s) * 1 / s, decimal=2)
+        assert_almost_equal(
+            np.var(A == np.sqrt(s) / np.sqrt(n_components), ddof=1),
+            (1 - 1 / (2 * s)) * 1 / (2 * s),
+            decimal=2,
+        )
+        assert_almost_equal(
+            np.var(A == -np.sqrt(s) / np.sqrt(n_components), ddof=1),
+            (1 - 1 / (2 * s)) * 1 / (2 * s),
+            decimal=2,
+        )
+
+
+###############################################################################
+# tests on random projection transformer
+###############################################################################
+
+
+def test_random_projection_transformer_invalid_input():
+    n_components = "auto"
+    fit_data = [[0, 1, 2]]
+    for RandomProjection in all_RandomProjection:
+        with pytest.raises(ValueError):
+            RandomProjection(n_components=n_components).fit(fit_data)
+
+
+@pytest.mark.parametrize("coo_container", COO_CONTAINERS)
+def test_try_to_transform_before_fit(coo_container, global_random_seed):
+    data = make_sparse_random_data(
+        coo_container,
+        n_samples,
+        n_features,
+        n_nonzeros,
+        random_state=global_random_seed,
+        sparse_format=None,
+    )
+    for RandomProjection in all_RandomProjection:
+        with pytest.raises(NotFittedError):
+            RandomProjection(n_components="auto").transform(data)
+
+
+@pytest.mark.parametrize("coo_container", COO_CONTAINERS)
+def test_too_many_samples_to_find_a_safe_embedding(coo_container, global_random_seed):
+    data = make_sparse_random_data(
+        coo_container,
+        n_samples=1000,
+        n_features=100,
+        n_nonzeros=1000,
+        random_state=global_random_seed,
+        sparse_format=None,
+    )
+
+    for RandomProjection in all_RandomProjection:
+        rp = RandomProjection(n_components="auto", eps=0.1)
+        expected_msg = (
+            "eps=0.100000 and n_samples=1000 lead to a target dimension"
+            " of 5920 which is larger than the original space with"
+            " n_features=100"
+        )
+        with pytest.raises(ValueError, match=expected_msg):
+            rp.fit(data)
+
+
+@pytest.mark.parametrize("coo_container", COO_CONTAINERS)
+def test_random_projection_embedding_quality(coo_container):
+    data = make_sparse_random_data(
+        coo_container,
+        n_samples=8,
+        n_features=5000,
+        n_nonzeros=15000,
+        random_state=0,
+        sparse_format=None,
+    )
+    eps = 0.2
+
+    original_distances = euclidean_distances(data, squared=True)
+    original_distances = original_distances.ravel()
+    non_identical = original_distances != 0.0
+
+    # remove 0 distances to avoid division by 0
+    original_distances = original_distances[non_identical]
+
+    for RandomProjection in all_RandomProjection:
+        rp = RandomProjection(n_components="auto", eps=eps, random_state=0)
+        projected = rp.fit_transform(data)
+
+        projected_distances = euclidean_distances(projected, squared=True)
+        projected_distances = projected_distances.ravel()
+
+        # remove 0 distances to avoid division by 0
+        projected_distances = projected_distances[non_identical]
+
+        distances_ratio = projected_distances / original_distances
+
+        # check that the automatically tuned values for the density respect the
+        # contract for eps: pairwise distances are preserved according to the
+        # Johnson-Lindenstrauss lemma
+        assert distances_ratio.max() < 1 + eps
+        assert 1 - eps < distances_ratio.min()
+
+
+@pytest.mark.parametrize("coo_container", COO_CONTAINERS)
+def test_SparseRandomProj_output_representation(coo_container):
+    dense_data = make_sparse_random_data(
+        coo_container,
+        n_samples,
+        n_features,
+        n_nonzeros,
+        random_state=0,
+        sparse_format=None,
+    )
+    sparse_data = make_sparse_random_data(
+        coo_container,
+        n_samples,
+        n_features,
+        n_nonzeros,
+        random_state=0,
+        sparse_format="csr",
+    )
+    for SparseRandomProj in all_SparseRandomProjection:
+        # when using sparse input, the projected data can be forced to be a
+        # dense numpy array
+        rp = SparseRandomProj(n_components=10, dense_output=True, random_state=0)
+        rp.fit(dense_data)
+        assert isinstance(rp.transform(dense_data), np.ndarray)
+        assert isinstance(rp.transform(sparse_data), np.ndarray)
+
+        # the output can be left to a sparse matrix instead
+        rp = SparseRandomProj(n_components=10, dense_output=False, random_state=0)
+        rp = rp.fit(dense_data)
+        # output for dense input will stay dense:
+        assert isinstance(rp.transform(dense_data), np.ndarray)
+
+        # output for sparse output will be sparse:
+        assert sp.issparse(rp.transform(sparse_data))
+
+
+@pytest.mark.parametrize("coo_container", COO_CONTAINERS)
+def test_correct_RandomProjection_dimensions_embedding(
+    coo_container, global_random_seed
+):
+    data = make_sparse_random_data(
+        coo_container,
+        n_samples,
+        n_features,
+        n_nonzeros,
+        random_state=global_random_seed,
+        sparse_format=None,
+    )
+    for RandomProjection in all_RandomProjection:
+        rp = RandomProjection(n_components="auto", random_state=0, eps=0.5).fit(data)
+
+        # the number of components is adjusted from the shape of the training
+        # set
+        assert rp.n_components == "auto"
+        assert rp.n_components_ == 110
+
+        if RandomProjection in all_SparseRandomProjection:
+            assert rp.density == "auto"
+            assert_almost_equal(rp.density_, 0.03, 2)
+
+        assert rp.components_.shape == (110, n_features)
+
+        projected_1 = rp.transform(data)
+        assert projected_1.shape == (n_samples, 110)
+
+        # once the RP is 'fitted' the projection is always the same
+        projected_2 = rp.transform(data)
+        assert_array_equal(projected_1, projected_2)
+
+        # fit transform with same random seed will lead to the same results
+        rp2 = RandomProjection(random_state=0, eps=0.5)
+        projected_3 = rp2.fit_transform(data)
+        assert_array_equal(projected_1, projected_3)
+
+        # Try to transform with an input X of size different from fitted.
+        with pytest.raises(ValueError):
+            rp.transform(data[:, 1:5])
+
+        # it is also possible to fix the number of components and the density
+        # level
+        if RandomProjection in all_SparseRandomProjection:
+            rp = RandomProjection(n_components=100, density=0.001, random_state=0)
+            projected = rp.fit_transform(data)
+            assert projected.shape == (n_samples, 100)
+            assert rp.components_.shape == (100, n_features)
+            assert rp.components_.nnz < 115  # close to 1% density
+            assert 85 < rp.components_.nnz  # close to 1% density
+
+
+@pytest.mark.parametrize("coo_container", COO_CONTAINERS)
+def test_warning_n_components_greater_than_n_features(
+    coo_container, global_random_seed
+):
+    n_features = 20
+    n_samples = 5
+    n_nonzeros = int(n_features / 4)
+    data = make_sparse_random_data(
+        coo_container,
+        n_samples,
+        n_features,
+        n_nonzeros,
+        random_state=global_random_seed,
+        sparse_format=None,
+    )
+
+    for RandomProjection in all_RandomProjection:
+        with pytest.warns(DataDimensionalityWarning):
+            RandomProjection(n_components=n_features + 1).fit(data)
+
+
+@pytest.mark.parametrize("coo_container", COO_CONTAINERS)
+def test_works_with_sparse_data(coo_container, global_random_seed):
+    n_features = 20
+    n_samples = 5
+    n_nonzeros = int(n_features / 4)
+    dense_data = make_sparse_random_data(
+        coo_container,
+        n_samples,
+        n_features,
+        n_nonzeros,
+        random_state=global_random_seed,
+        sparse_format=None,
+    )
+    sparse_data = make_sparse_random_data(
+        coo_container,
+        n_samples,
+        n_features,
+        n_nonzeros,
+        random_state=global_random_seed,
+        sparse_format="csr",
+    )
+
+    for RandomProjection in all_RandomProjection:
+        rp_dense = RandomProjection(n_components=3, random_state=1).fit(dense_data)
+        rp_sparse = RandomProjection(n_components=3, random_state=1).fit(sparse_data)
+        assert_array_almost_equal(
+            densify(rp_dense.components_), densify(rp_sparse.components_)
+        )
+
+
+def test_johnson_lindenstrauss_min_dim():
+    """Test Johnson-Lindenstrauss for small eps.
+
+    Regression test for #17111: before #19374, 32-bit systems would fail.
+    """
+    assert johnson_lindenstrauss_min_dim(100, eps=1e-5) == 368416070986
+
+
+@pytest.mark.parametrize("coo_container", COO_CONTAINERS)
+@pytest.mark.parametrize("random_projection_cls", all_RandomProjection)
+def test_random_projection_feature_names_out(
+    coo_container, random_projection_cls, global_random_seed
+):
+    data = make_sparse_random_data(
+        coo_container,
+        n_samples,
+        n_features,
+        n_nonzeros,
+        random_state=global_random_seed,
+        sparse_format=None,
+    )
+    random_projection = random_projection_cls(n_components=2)
+    random_projection.fit(data)
+    names_out = random_projection.get_feature_names_out()
+    class_name_lower = random_projection_cls.__name__.lower()
+    expected_names_out = np.array(
+        [f"{class_name_lower}{i}" for i in range(random_projection.n_components_)],
+        dtype=object,
+    )
+
+    assert_array_equal(names_out, expected_names_out)
+
+
+@pytest.mark.parametrize("coo_container", COO_CONTAINERS)
+@pytest.mark.parametrize("n_samples", (2, 9, 10, 11, 1000))
+@pytest.mark.parametrize("n_features", (2, 9, 10, 11, 1000))
+@pytest.mark.parametrize("random_projection_cls", all_RandomProjection)
+@pytest.mark.parametrize("compute_inverse_components", [True, False])
+def test_inverse_transform(
+    coo_container,
+    n_samples,
+    n_features,
+    random_projection_cls,
+    compute_inverse_components,
+    global_random_seed,
+):
+    n_components = 10
+
+    random_projection = random_projection_cls(
+        n_components=n_components,
+        compute_inverse_components=compute_inverse_components,
+        random_state=global_random_seed,
+    )
+
+    X_dense = make_sparse_random_data(
+        coo_container,
+        n_samples,
+        n_features,
+        n_nonzeros=n_samples * n_features // 100 + 1,
+        random_state=global_random_seed,
+        sparse_format=None,
+    )
+    X_csr = make_sparse_random_data(
+        coo_container,
+        n_samples,
+        n_features,
+        n_nonzeros=n_samples * n_features // 100 + 1,
+        random_state=global_random_seed,
+        sparse_format="csr",
+    )
+
+    for X in [X_dense, X_csr]:
+        with warnings.catch_warnings():
+            warnings.filterwarnings(
+                "ignore",
+                message=(
+                    "The number of components is higher than the number of features"
+                ),
+                category=DataDimensionalityWarning,
+            )
+            projected = random_projection.fit_transform(X)
+
+        if compute_inverse_components:
+            assert hasattr(random_projection, "inverse_components_")
+            inv_components = random_projection.inverse_components_
+            assert inv_components.shape == (n_features, n_components)
+
+        projected_back = random_projection.inverse_transform(projected)
+        assert projected_back.shape == X.shape
+
+        projected_again = random_projection.transform(projected_back)
+        if hasattr(projected, "toarray"):
+            projected = projected.toarray()
+        assert_allclose(projected, projected_again, rtol=1e-7, atol=1e-10)
+
+
+@pytest.mark.parametrize("random_projection_cls", all_RandomProjection)
+@pytest.mark.parametrize(
+    "input_dtype, expected_dtype",
+    (
+        (np.float32, np.float32),
+        (np.float64, np.float64),
+        (np.int32, np.float64),
+        (np.int64, np.float64),
+    ),
+)
+def test_random_projection_dtype_match(
+    random_projection_cls, input_dtype, expected_dtype
+):
+    # Verify output matrix dtype
+    rng = np.random.RandomState(42)
+    X = rng.rand(25, 3000)
+    rp = random_projection_cls(random_state=0)
+    transformed = rp.fit_transform(X.astype(input_dtype))
+
+    assert rp.components_.dtype == expected_dtype
+    assert transformed.dtype == expected_dtype
+
+
+@pytest.mark.parametrize("random_projection_cls", all_RandomProjection)
+def test_random_projection_numerical_consistency(random_projection_cls):
+    # Verify numerical consistency among np.float32 and np.float64
+    atol = 1e-5
+    rng = np.random.RandomState(42)
+    X = rng.rand(25, 3000)
+    rp_32 = random_projection_cls(random_state=0)
+    rp_64 = random_projection_cls(random_state=0)
+
+    projection_32 = rp_32.fit_transform(X.astype(np.float32))
+    projection_64 = rp_64.fit_transform(X.astype(np.float64))
+
+    assert_allclose(projection_64, projection_32, atol=atol)
+
+    assert_allclose_dense_sparse(rp_32.components_, rp_64.components_)