feat: initial commit - Phase 1 & 2 core features

2026-04-22 17:07:33 +08:00
commit 1773bda06b
25005 changed files with 6252106 additions and 0 deletions
@@ -0,0 +1,7 @@
+"""
+The :mod:`sklearn.experimental` module provides importable modules that enable
+the use of experimental features or estimators.
+
+The features and estimators that are experimental aren't subject to
+deprecation cycles. Use them at your own risks!
+"""
@@ -0,0 +1,32 @@
+"""Enables Successive Halving search-estimators
+
+The API and results of these estimators might change without any deprecation
+cycle.
+
+Importing this file dynamically sets the
+:class:`~sklearn.model_selection.HalvingRandomSearchCV` and
+:class:`~sklearn.model_selection.HalvingGridSearchCV` as attributes of the
+`model_selection` module::
+
+    >>> # explicitly require this experimental feature
+    >>> from sklearn.experimental import enable_halving_search_cv # noqa
+    >>> # now you can import normally from model_selection
+    >>> from sklearn.model_selection import HalvingRandomSearchCV
+    >>> from sklearn.model_selection import HalvingGridSearchCV
+
+
+The ``# noqa`` comment comment can be removed: it just tells linters like
+flake8 to ignore the import, which appears as unused.
+"""
+
+from .. import model_selection
+from ..model_selection._search_successive_halving import (
+    HalvingGridSearchCV,
+    HalvingRandomSearchCV,
+)
+
+# use settattr to avoid mypy errors when monkeypatching
+setattr(model_selection, "HalvingRandomSearchCV", HalvingRandomSearchCV)
+setattr(model_selection, "HalvingGridSearchCV", HalvingGridSearchCV)
+
+model_selection.__all__ += ["HalvingRandomSearchCV", "HalvingGridSearchCV"]
@@ -0,0 +1,21 @@
+"""This is now a no-op and can be safely removed from your code.
+
+It used to enable the use of
+:class:`~sklearn.ensemble.HistGradientBoostingClassifier` and
+:class:`~sklearn.ensemble.HistGradientBoostingRegressor` when they were still
+:term:`experimental`, but these estimators are now stable and can be imported
+normally from `sklearn.ensemble`.
+"""
+
+# Don't remove this file, we don't want to break users code just because the
+# feature isn't experimental anymore.
+
+
+import warnings
+
+warnings.warn(
+    "Since version 1.0, "
+    "it is not needed to import enable_hist_gradient_boosting anymore. "
+    "HistGradientBoostingClassifier and HistGradientBoostingRegressor are now "
+    "stable and can be normally imported from sklearn.ensemble."
+)
@@ -0,0 +1,20 @@
+"""Enables IterativeImputer
+
+The API and results of this estimator might change without any deprecation
+cycle.
+
+Importing this file dynamically sets :class:`~sklearn.impute.IterativeImputer`
+as an attribute of the impute module::
+
+    >>> # explicitly require this experimental feature
+    >>> from sklearn.experimental import enable_iterative_imputer  # noqa
+    >>> # now you can import normally from impute
+    >>> from sklearn.impute import IterativeImputer
+"""
+
+from .. import impute
+from ..impute._iterative import IterativeImputer
+
+# use settattr to avoid mypy errors when monkeypatching
+setattr(impute, "IterativeImputer", IterativeImputer)
+impute.__all__ += ["IterativeImputer"]
@@ -0,0 +1,19 @@
+"""Tests for making sure experimental imports work as expected."""
+
+import textwrap
+
+import pytest
+
+from sklearn.utils._testing import assert_run_python_script_without_output
+from sklearn.utils.fixes import _IS_WASM
+
+
+@pytest.mark.xfail(_IS_WASM, reason="cannot start subprocess")
+def test_import_raises_warning():
+    code = """
+    import pytest
+    with pytest.warns(UserWarning, match="it is not needed to import"):
+        from sklearn.experimental import enable_hist_gradient_boosting  # noqa
+    """
+    pattern = "it is not needed to import enable_hist_gradient_boosting anymore"
+    assert_run_python_script_without_output(textwrap.dedent(code), pattern=pattern)
@@ -0,0 +1,51 @@
+"""Tests for making sure experimental imports work as expected."""
+
+import textwrap
+
+import pytest
+
+from sklearn.utils._testing import assert_run_python_script_without_output
+from sklearn.utils.fixes import _IS_WASM
+
+
+@pytest.mark.xfail(_IS_WASM, reason="cannot start subprocess")
+def test_imports_strategies():
+    # Make sure different import strategies work or fail as expected.
+
+    # Since Python caches the imported modules, we need to run a child process
+    # for every test case. Else, the tests would not be independent
+    # (manually removing the imports from the cache (sys.modules) is not
+    # recommended and can lead to many complications).
+    pattern = "IterativeImputer is experimental"
+    good_import = """
+    from sklearn.experimental import enable_iterative_imputer
+    from sklearn.impute import IterativeImputer
+    """
+    assert_run_python_script_without_output(
+        textwrap.dedent(good_import), pattern=pattern
+    )
+
+    good_import_with_ensemble_first = """
+    import sklearn.ensemble
+    from sklearn.experimental import enable_iterative_imputer
+    from sklearn.impute import IterativeImputer
+    """
+    assert_run_python_script_without_output(
+        textwrap.dedent(good_import_with_ensemble_first),
+        pattern=pattern,
+    )
+
+    bad_imports = f"""
+    import pytest
+
+    with pytest.raises(ImportError, match={pattern!r}):
+        from sklearn.impute import IterativeImputer
+
+    import sklearn.experimental
+    with pytest.raises(ImportError, match={pattern!r}):
+        from sklearn.impute import IterativeImputer
+    """
+    assert_run_python_script_without_output(
+        textwrap.dedent(bad_imports),
+        pattern=pattern,
+    )
@@ -0,0 +1,53 @@
+"""Tests for making sure experimental imports work as expected."""
+
+import textwrap
+
+import pytest
+
+from sklearn.utils._testing import assert_run_python_script_without_output
+from sklearn.utils.fixes import _IS_WASM
+
+
+@pytest.mark.xfail(_IS_WASM, reason="cannot start subprocess")
+def test_imports_strategies():
+    # Make sure different import strategies work or fail as expected.
+
+    # Since Python caches the imported modules, we need to run a child process
+    # for every test case. Else, the tests would not be independent
+    # (manually removing the imports from the cache (sys.modules) is not
+    # recommended and can lead to many complications).
+    pattern = "Halving(Grid|Random)SearchCV is experimental"
+    good_import = """
+    from sklearn.experimental import enable_halving_search_cv
+    from sklearn.model_selection import HalvingGridSearchCV
+    from sklearn.model_selection import HalvingRandomSearchCV
+    """
+    assert_run_python_script_without_output(
+        textwrap.dedent(good_import), pattern=pattern
+    )
+
+    good_import_with_model_selection_first = """
+    import sklearn.model_selection
+    from sklearn.experimental import enable_halving_search_cv
+    from sklearn.model_selection import HalvingGridSearchCV
+    from sklearn.model_selection import HalvingRandomSearchCV
+    """
+    assert_run_python_script_without_output(
+        textwrap.dedent(good_import_with_model_selection_first),
+        pattern=pattern,
+    )
+
+    bad_imports = f"""
+    import pytest
+
+    with pytest.raises(ImportError, match={pattern!r}):
+        from sklearn.model_selection import HalvingGridSearchCV
+
+    import sklearn.experimental
+    with pytest.raises(ImportError, match={pattern!r}):
+        from sklearn.model_selection import HalvingRandomSearchCV
+    """
+    assert_run_python_script_without_output(
+        textwrap.dedent(bad_imports),
+        pattern=pattern,
+    )