feat: initial commit - Phase 1 & 2 core features

This commit is contained in:
hiderfong
2026-04-22 17:07:33 +08:00
commit 1773bda06b
25005 changed files with 6252106 additions and 0 deletions
@@ -0,0 +1,7 @@
"""
The :mod:`sklearn.experimental` module provides importable modules that enable
the use of experimental features or estimators.
The features and estimators that are experimental aren't subject to
deprecation cycles. Use them at your own risks!
"""
@@ -0,0 +1,32 @@
"""Enables Successive Halving search-estimators
The API and results of these estimators might change without any deprecation
cycle.
Importing this file dynamically sets the
:class:`~sklearn.model_selection.HalvingRandomSearchCV` and
:class:`~sklearn.model_selection.HalvingGridSearchCV` as attributes of the
`model_selection` module::
>>> # explicitly require this experimental feature
>>> from sklearn.experimental import enable_halving_search_cv # noqa
>>> # now you can import normally from model_selection
>>> from sklearn.model_selection import HalvingRandomSearchCV
>>> from sklearn.model_selection import HalvingGridSearchCV
The ``# noqa`` comment comment can be removed: it just tells linters like
flake8 to ignore the import, which appears as unused.
"""
from .. import model_selection
from ..model_selection._search_successive_halving import (
HalvingGridSearchCV,
HalvingRandomSearchCV,
)
# use settattr to avoid mypy errors when monkeypatching
setattr(model_selection, "HalvingRandomSearchCV", HalvingRandomSearchCV)
setattr(model_selection, "HalvingGridSearchCV", HalvingGridSearchCV)
model_selection.__all__ += ["HalvingRandomSearchCV", "HalvingGridSearchCV"]
@@ -0,0 +1,21 @@
"""This is now a no-op and can be safely removed from your code.
It used to enable the use of
:class:`~sklearn.ensemble.HistGradientBoostingClassifier` and
:class:`~sklearn.ensemble.HistGradientBoostingRegressor` when they were still
:term:`experimental`, but these estimators are now stable and can be imported
normally from `sklearn.ensemble`.
"""
# Don't remove this file, we don't want to break users code just because the
# feature isn't experimental anymore.
import warnings
warnings.warn(
"Since version 1.0, "
"it is not needed to import enable_hist_gradient_boosting anymore. "
"HistGradientBoostingClassifier and HistGradientBoostingRegressor are now "
"stable and can be normally imported from sklearn.ensemble."
)
@@ -0,0 +1,20 @@
"""Enables IterativeImputer
The API and results of this estimator might change without any deprecation
cycle.
Importing this file dynamically sets :class:`~sklearn.impute.IterativeImputer`
as an attribute of the impute module::
>>> # explicitly require this experimental feature
>>> from sklearn.experimental import enable_iterative_imputer # noqa
>>> # now you can import normally from impute
>>> from sklearn.impute import IterativeImputer
"""
from .. import impute
from ..impute._iterative import IterativeImputer
# use settattr to avoid mypy errors when monkeypatching
setattr(impute, "IterativeImputer", IterativeImputer)
impute.__all__ += ["IterativeImputer"]
@@ -0,0 +1,19 @@
"""Tests for making sure experimental imports work as expected."""
import textwrap
import pytest
from sklearn.utils._testing import assert_run_python_script_without_output
from sklearn.utils.fixes import _IS_WASM
@pytest.mark.xfail(_IS_WASM, reason="cannot start subprocess")
def test_import_raises_warning():
code = """
import pytest
with pytest.warns(UserWarning, match="it is not needed to import"):
from sklearn.experimental import enable_hist_gradient_boosting # noqa
"""
pattern = "it is not needed to import enable_hist_gradient_boosting anymore"
assert_run_python_script_without_output(textwrap.dedent(code), pattern=pattern)
@@ -0,0 +1,51 @@
"""Tests for making sure experimental imports work as expected."""
import textwrap
import pytest
from sklearn.utils._testing import assert_run_python_script_without_output
from sklearn.utils.fixes import _IS_WASM
@pytest.mark.xfail(_IS_WASM, reason="cannot start subprocess")
def test_imports_strategies():
# Make sure different import strategies work or fail as expected.
# Since Python caches the imported modules, we need to run a child process
# for every test case. Else, the tests would not be independent
# (manually removing the imports from the cache (sys.modules) is not
# recommended and can lead to many complications).
pattern = "IterativeImputer is experimental"
good_import = """
from sklearn.experimental import enable_iterative_imputer
from sklearn.impute import IterativeImputer
"""
assert_run_python_script_without_output(
textwrap.dedent(good_import), pattern=pattern
)
good_import_with_ensemble_first = """
import sklearn.ensemble
from sklearn.experimental import enable_iterative_imputer
from sklearn.impute import IterativeImputer
"""
assert_run_python_script_without_output(
textwrap.dedent(good_import_with_ensemble_first),
pattern=pattern,
)
bad_imports = f"""
import pytest
with pytest.raises(ImportError, match={pattern!r}):
from sklearn.impute import IterativeImputer
import sklearn.experimental
with pytest.raises(ImportError, match={pattern!r}):
from sklearn.impute import IterativeImputer
"""
assert_run_python_script_without_output(
textwrap.dedent(bad_imports),
pattern=pattern,
)
@@ -0,0 +1,53 @@
"""Tests for making sure experimental imports work as expected."""
import textwrap
import pytest
from sklearn.utils._testing import assert_run_python_script_without_output
from sklearn.utils.fixes import _IS_WASM
@pytest.mark.xfail(_IS_WASM, reason="cannot start subprocess")
def test_imports_strategies():
# Make sure different import strategies work or fail as expected.
# Since Python caches the imported modules, we need to run a child process
# for every test case. Else, the tests would not be independent
# (manually removing the imports from the cache (sys.modules) is not
# recommended and can lead to many complications).
pattern = "Halving(Grid|Random)SearchCV is experimental"
good_import = """
from sklearn.experimental import enable_halving_search_cv
from sklearn.model_selection import HalvingGridSearchCV
from sklearn.model_selection import HalvingRandomSearchCV
"""
assert_run_python_script_without_output(
textwrap.dedent(good_import), pattern=pattern
)
good_import_with_model_selection_first = """
import sklearn.model_selection
from sklearn.experimental import enable_halving_search_cv
from sklearn.model_selection import HalvingGridSearchCV
from sklearn.model_selection import HalvingRandomSearchCV
"""
assert_run_python_script_without_output(
textwrap.dedent(good_import_with_model_selection_first),
pattern=pattern,
)
bad_imports = f"""
import pytest
with pytest.raises(ImportError, match={pattern!r}):
from sklearn.model_selection import HalvingGridSearchCV
import sklearn.experimental
with pytest.raises(ImportError, match={pattern!r}):
from sklearn.model_selection import HalvingRandomSearchCV
"""
assert_run_python_script_without_output(
textwrap.dedent(bad_imports),
pattern=pattern,
)