feat: initial commit - Phase 1 & 2 core features

2026-04-22 17:07:33 +08:00
commit 1773bda06b
25005 changed files with 6252106 additions and 0 deletions
@@ -0,0 +1,238 @@
+"""test the label propagation module"""
+
+import warnings
+
+import numpy as np
+import pytest
+from scipy.sparse import issparse
+
+from sklearn.datasets import make_classification
+from sklearn.exceptions import ConvergenceWarning
+from sklearn.metrics.pairwise import rbf_kernel
+from sklearn.model_selection import train_test_split
+from sklearn.neighbors import NearestNeighbors
+from sklearn.semi_supervised import _label_propagation as label_propagation
+from sklearn.utils._testing import (
+    _convert_container,
+    assert_allclose,
+    assert_array_equal,
+)
+
+CONSTRUCTOR_TYPES = ("array", "sparse_csr", "sparse_csc")
+
+ESTIMATORS = [
+    (label_propagation.LabelPropagation, {"kernel": "rbf"}),
+    (label_propagation.LabelPropagation, {"kernel": "knn", "n_neighbors": 2}),
+    (
+        label_propagation.LabelPropagation,
+        {"kernel": lambda x, y: rbf_kernel(x, y, gamma=20)},
+    ),
+    (label_propagation.LabelSpreading, {"kernel": "rbf"}),
+    (label_propagation.LabelSpreading, {"kernel": "knn", "n_neighbors": 2}),
+    (
+        label_propagation.LabelSpreading,
+        {"kernel": lambda x, y: rbf_kernel(x, y, gamma=20)},
+    ),
+]
+
+
+@pytest.mark.parametrize("Estimator, parameters", ESTIMATORS)
+def test_fit_transduction(global_dtype, Estimator, parameters):
+    samples = np.asarray([[1.0, 0.0], [0.0, 2.0], [1.0, 3.0]], dtype=global_dtype)
+    labels = [0, 1, -1]
+    clf = Estimator(**parameters).fit(samples, labels)
+    assert clf.transduction_[2] == 1
+
+
+@pytest.mark.parametrize("Estimator, parameters", ESTIMATORS)
+def test_distribution(global_dtype, Estimator, parameters):
+    if parameters["kernel"] == "knn":
+        pytest.skip(
+            "Unstable test for this configuration: changes in k-NN ordering break it."
+        )
+    samples = np.asarray([[1.0, 0.0], [0.0, 1.0], [1.0, 1.0]], dtype=global_dtype)
+    labels = [0, 1, -1]
+    clf = Estimator(**parameters).fit(samples, labels)
+    assert_allclose(clf.label_distributions_[2], [0.5, 0.5], atol=1e-2)
+
+
+@pytest.mark.parametrize("Estimator, parameters", ESTIMATORS)
+def test_predict(global_dtype, Estimator, parameters):
+    samples = np.asarray([[1.0, 0.0], [0.0, 2.0], [1.0, 3.0]], dtype=global_dtype)
+    labels = [0, 1, -1]
+    clf = Estimator(**parameters).fit(samples, labels)
+    assert_array_equal(clf.predict([[0.5, 2.5]]), np.array([1]))
+
+
+@pytest.mark.parametrize("Estimator, parameters", ESTIMATORS)
+def test_predict_proba(global_dtype, Estimator, parameters):
+    samples = np.asarray([[1.0, 0.0], [0.0, 1.0], [1.0, 2.5]], dtype=global_dtype)
+    labels = [0, 1, -1]
+    clf = Estimator(**parameters).fit(samples, labels)
+    assert_allclose(clf.predict_proba([[1.0, 1.0]]), np.array([[0.5, 0.5]]))
+
+
+@pytest.mark.parametrize("alpha", [0.1, 0.3, 0.5, 0.7, 0.9])
+@pytest.mark.parametrize("Estimator, parameters", ESTIMATORS)
+def test_label_spreading_closed_form(global_dtype, Estimator, parameters, alpha):
+    n_classes = 2
+    X, y = make_classification(n_classes=n_classes, n_samples=200, random_state=0)
+    X = X.astype(global_dtype, copy=False)
+    y[::3] = -1
+
+    gamma = 0.1
+    clf = label_propagation.LabelSpreading(gamma=gamma).fit(X, y)
+    # adopting notation from Zhou et al (2004):
+    S = clf._build_graph()
+    Y = np.zeros((len(y), n_classes + 1), dtype=X.dtype)
+    Y[np.arange(len(y)), y] = 1
+    Y = Y[:, :-1]
+
+    expected = np.dot(np.linalg.inv(np.eye(len(S), dtype=S.dtype) - alpha * S), Y)
+    expected /= expected.sum(axis=1)[:, np.newaxis]
+
+    clf = label_propagation.LabelSpreading(
+        max_iter=100, alpha=alpha, tol=1e-10, gamma=gamma
+    )
+    clf.fit(X, y)
+
+    assert_allclose(expected, clf.label_distributions_)
+
+
+def test_label_propagation_closed_form(global_dtype):
+    n_classes = 2
+    X, y = make_classification(n_classes=n_classes, n_samples=200, random_state=0)
+    X = X.astype(global_dtype, copy=False)
+    y[::3] = -1
+    Y = np.zeros((len(y), n_classes + 1))
+    Y[np.arange(len(y)), y] = 1
+    unlabelled_idx = Y[:, (-1,)].nonzero()[0]
+    labelled_idx = (Y[:, (-1,)] == 0).nonzero()[0]
+
+    clf = label_propagation.LabelPropagation(max_iter=100, tol=1e-10, gamma=0.1)
+    clf.fit(X, y)
+    # adopting notation from Zhu et al 2002
+    T_bar = clf._build_graph()
+    Tuu = T_bar[tuple(np.meshgrid(unlabelled_idx, unlabelled_idx, indexing="ij"))]
+    Tul = T_bar[tuple(np.meshgrid(unlabelled_idx, labelled_idx, indexing="ij"))]
+    Y = Y[:, :-1]
+    Y_l = Y[labelled_idx, :]
+    Y_u = np.dot(np.dot(np.linalg.inv(np.eye(Tuu.shape[0]) - Tuu), Tul), Y_l)
+
+    expected = Y.copy()
+    expected[unlabelled_idx, :] = Y_u
+    expected /= expected.sum(axis=1)[:, np.newaxis]
+
+    assert_allclose(expected, clf.label_distributions_, atol=1e-4)
+
+
+@pytest.mark.parametrize("accepted_sparse_type", ["sparse_csr", "sparse_csc"])
+@pytest.mark.parametrize("index_dtype", [np.int32, np.int64])
+@pytest.mark.parametrize("dtype", [np.float32, np.float64])
+@pytest.mark.parametrize("Estimator, parameters", ESTIMATORS)
+def test_sparse_input_types(
+    accepted_sparse_type, index_dtype, dtype, Estimator, parameters
+):
+    # This is non-regression test for #17085
+    X = _convert_container([[1.0, 0.0], [0.0, 2.0], [1.0, 3.0]], accepted_sparse_type)
+    X.data = X.data.astype(dtype, copy=False)
+    X.indices = X.indices.astype(index_dtype, copy=False)
+    X.indptr = X.indptr.astype(index_dtype, copy=False)
+    labels = [0, 1, -1]
+    clf = Estimator(**parameters).fit(X, labels)
+    assert_array_equal(clf.predict([[0.5, 2.5]]), np.array([1]))
+
+
+@pytest.mark.parametrize("constructor_type", CONSTRUCTOR_TYPES)
+def test_convergence_speed(constructor_type):
+    # This is a non-regression test for #5774
+    X = _convert_container([[1.0, 0.0], [0.0, 1.0], [1.0, 2.5]], constructor_type)
+    y = np.array([0, 1, -1])
+    mdl = label_propagation.LabelSpreading(kernel="rbf", max_iter=5000)
+    mdl.fit(X, y)
+
+    # this should converge quickly:
+    assert mdl.n_iter_ < 10
+    assert_array_equal(mdl.predict(X), [0, 1, 1])
+
+
+def test_convergence_warning():
+    # This is a non-regression test for #5774
+    X = np.array([[1.0, 0.0], [0.0, 1.0], [1.0, 2.5]])
+    y = np.array([0, 1, -1])
+    mdl = label_propagation.LabelSpreading(kernel="rbf", max_iter=1)
+    warn_msg = "max_iter=1 was reached without convergence."
+    with pytest.warns(ConvergenceWarning, match=warn_msg):
+        mdl.fit(X, y)
+    assert mdl.n_iter_ == mdl.max_iter
+
+    mdl = label_propagation.LabelPropagation(kernel="rbf", max_iter=1)
+    with pytest.warns(ConvergenceWarning, match=warn_msg):
+        mdl.fit(X, y)
+    assert mdl.n_iter_ == mdl.max_iter
+
+    mdl = label_propagation.LabelSpreading(kernel="rbf", max_iter=500)
+    with warnings.catch_warnings():
+        warnings.simplefilter("error", ConvergenceWarning)
+        mdl.fit(X, y)
+
+    mdl = label_propagation.LabelPropagation(kernel="rbf", max_iter=500)
+    with warnings.catch_warnings():
+        warnings.simplefilter("error", ConvergenceWarning)
+        mdl.fit(X, y)
+
+
+@pytest.mark.parametrize(
+    "LabelPropagationCls",
+    [label_propagation.LabelSpreading, label_propagation.LabelPropagation],
+)
+def test_label_propagation_non_zero_normalizer(LabelPropagationCls):
+    # check that we don't divide by zero in case of null normalizer
+    # non-regression test for
+    # https://github.com/scikit-learn/scikit-learn/pull/15946
+    # https://github.com/scikit-learn/scikit-learn/issues/9292
+    X = np.array([[100.0, 100.0], [100.0, 100.0], [0.0, 0.0], [0.0, 0.0]])
+    y = np.array([0, 1, -1, -1])
+    mdl = LabelPropagationCls(kernel="knn", max_iter=100, n_neighbors=1)
+    with warnings.catch_warnings():
+        warnings.simplefilter("error", RuntimeWarning)
+        mdl.fit(X, y)
+
+
+def test_predict_sparse_callable_kernel(global_dtype):
+    # This is a non-regression test for #15866
+
+    # Custom sparse kernel (top-K RBF)
+    def topk_rbf(X, Y=None, n_neighbors=10, gamma=1e-5):
+        nn = NearestNeighbors(n_neighbors=10, metric="euclidean", n_jobs=2)
+        nn.fit(X)
+        W = -1 * nn.kneighbors_graph(Y, mode="distance").power(2) * gamma
+        np.exp(W.data, out=W.data)
+        assert issparse(W)
+        return W.T
+
+    n_classes = 4
+    n_samples = 500
+    n_test = 10
+    X, y = make_classification(
+        n_classes=n_classes,
+        n_samples=n_samples,
+        n_features=20,
+        n_informative=20,
+        n_redundant=0,
+        n_repeated=0,
+        random_state=0,
+    )
+    X = X.astype(global_dtype)
+
+    X_train, X_test, y_train, y_test = train_test_split(
+        X, y, test_size=n_test, random_state=0
+    )
+
+    model = label_propagation.LabelSpreading(kernel=topk_rbf)
+    model.fit(X_train, y_train)
+    assert model.score(X_test, y_test) >= 0.9
+
+    model = label_propagation.LabelPropagation(kernel=topk_rbf)
+    model.fit(X_train, y_train)
+    assert model.score(X_test, y_test) >= 0.9
@@ -0,0 +1,345 @@
+from math import ceil
+
+import numpy as np
+import pytest
+from numpy.testing import assert_array_equal
+
+from sklearn.datasets import load_iris, make_blobs
+from sklearn.ensemble import StackingClassifier
+from sklearn.exceptions import NotFittedError
+from sklearn.metrics import accuracy_score
+from sklearn.model_selection import train_test_split
+from sklearn.neighbors import KNeighborsClassifier
+from sklearn.semi_supervised import SelfTrainingClassifier
+from sklearn.svm import SVC
+from sklearn.tree import DecisionTreeClassifier
+
+# Author: Oliver Rausch <rauscho@ethz.ch>
+# License: BSD 3 clause
+
+# load the iris dataset and randomly permute it
+iris = load_iris()
+X_train, X_test, y_train, y_test = train_test_split(
+    iris.data, iris.target, random_state=0
+)
+
+n_labeled_samples = 50
+
+y_train_missing_labels = y_train.copy()
+y_train_missing_labels[n_labeled_samples:] = -1
+mapping = {0: "A", 1: "B", 2: "C", -1: "-1"}
+y_train_missing_strings = np.vectorize(mapping.get)(y_train_missing_labels).astype(
+    object
+)
+y_train_missing_strings[y_train_missing_labels == -1] = -1
+
+
+def test_warns_k_best():
+    st = SelfTrainingClassifier(KNeighborsClassifier(), criterion="k_best", k_best=1000)
+    with pytest.warns(UserWarning, match="k_best is larger than"):
+        st.fit(X_train, y_train_missing_labels)
+
+    assert st.termination_condition_ == "all_labeled"
+
+
+@pytest.mark.parametrize(
+    "base_estimator",
+    [KNeighborsClassifier(), SVC(gamma="scale", probability=True, random_state=0)],
+)
+@pytest.mark.parametrize("selection_crit", ["threshold", "k_best"])
+def test_classification(base_estimator, selection_crit):
+    # Check classification for various parameter settings.
+    # Also assert that predictions for strings and numerical labels are equal.
+    # Also test for multioutput classification
+    threshold = 0.75
+    max_iter = 10
+    st = SelfTrainingClassifier(
+        base_estimator, max_iter=max_iter, threshold=threshold, criterion=selection_crit
+    )
+    st.fit(X_train, y_train_missing_labels)
+    pred = st.predict(X_test)
+    proba = st.predict_proba(X_test)
+
+    st_string = SelfTrainingClassifier(
+        base_estimator, max_iter=max_iter, criterion=selection_crit, threshold=threshold
+    )
+    st_string.fit(X_train, y_train_missing_strings)
+    pred_string = st_string.predict(X_test)
+    proba_string = st_string.predict_proba(X_test)
+
+    assert_array_equal(np.vectorize(mapping.get)(pred), pred_string)
+    assert_array_equal(proba, proba_string)
+
+    assert st.termination_condition_ == st_string.termination_condition_
+    # Check consistency between labeled_iter, n_iter and max_iter
+    labeled = y_train_missing_labels != -1
+    # assert that labeled samples have labeled_iter = 0
+    assert_array_equal(st.labeled_iter_ == 0, labeled)
+    # assert that labeled samples do not change label during training
+    assert_array_equal(y_train_missing_labels[labeled], st.transduction_[labeled])
+
+    # assert that the max of the iterations is less than the total amount of
+    # iterations
+    assert np.max(st.labeled_iter_) <= st.n_iter_ <= max_iter
+    assert np.max(st_string.labeled_iter_) <= st_string.n_iter_ <= max_iter
+
+    # check shapes
+    assert st.labeled_iter_.shape == st.transduction_.shape
+    assert st_string.labeled_iter_.shape == st_string.transduction_.shape
+
+
+def test_k_best():
+    st = SelfTrainingClassifier(
+        KNeighborsClassifier(n_neighbors=1),
+        criterion="k_best",
+        k_best=10,
+        max_iter=None,
+    )
+    y_train_only_one_label = np.copy(y_train)
+    y_train_only_one_label[1:] = -1
+    n_samples = y_train.shape[0]
+
+    n_expected_iter = ceil((n_samples - 1) / 10)
+    st.fit(X_train, y_train_only_one_label)
+    assert st.n_iter_ == n_expected_iter
+
+    # Check labeled_iter_
+    assert np.sum(st.labeled_iter_ == 0) == 1
+    for i in range(1, n_expected_iter):
+        assert np.sum(st.labeled_iter_ == i) == 10
+    assert np.sum(st.labeled_iter_ == n_expected_iter) == (n_samples - 1) % 10
+    assert st.termination_condition_ == "all_labeled"
+
+
+def test_sanity_classification():
+    base_estimator = SVC(gamma="scale", probability=True)
+    base_estimator.fit(X_train[n_labeled_samples:], y_train[n_labeled_samples:])
+
+    st = SelfTrainingClassifier(base_estimator)
+    st.fit(X_train, y_train_missing_labels)
+
+    pred1, pred2 = base_estimator.predict(X_test), st.predict(X_test)
+    assert not np.array_equal(pred1, pred2)
+    score_supervised = accuracy_score(base_estimator.predict(X_test), y_test)
+    score_self_training = accuracy_score(st.predict(X_test), y_test)
+
+    assert score_self_training > score_supervised
+
+
+def test_none_iter():
+    # Check that the all samples were labeled after a 'reasonable' number of
+    # iterations.
+    st = SelfTrainingClassifier(KNeighborsClassifier(), threshold=0.55, max_iter=None)
+    st.fit(X_train, y_train_missing_labels)
+
+    assert st.n_iter_ < 10
+    assert st.termination_condition_ == "all_labeled"
+
+
+@pytest.mark.parametrize(
+    "base_estimator",
+    [KNeighborsClassifier(), SVC(gamma="scale", probability=True, random_state=0)],
+)
+@pytest.mark.parametrize("y", [y_train_missing_labels, y_train_missing_strings])
+def test_zero_iterations(base_estimator, y):
+    # Check classification for zero iterations.
+    # Fitting a SelfTrainingClassifier with zero iterations should give the
+    # same results as fitting a supervised classifier.
+    # This also asserts that string arrays work as expected.
+
+    clf1 = SelfTrainingClassifier(base_estimator, max_iter=0)
+
+    clf1.fit(X_train, y)
+
+    clf2 = base_estimator.fit(X_train[:n_labeled_samples], y[:n_labeled_samples])
+
+    assert_array_equal(clf1.predict(X_test), clf2.predict(X_test))
+    assert clf1.termination_condition_ == "max_iter"
+
+
+def test_prefitted_throws_error():
+    # Test that passing a pre-fitted classifier and calling predict throws an
+    # error
+    knn = KNeighborsClassifier()
+    knn.fit(X_train, y_train)
+    st = SelfTrainingClassifier(knn)
+    with pytest.raises(
+        NotFittedError,
+        match="This SelfTrainingClassifier instance is not fitted yet",
+    ):
+        st.predict(X_train)
+
+
+@pytest.mark.parametrize("max_iter", range(1, 5))
+def test_labeled_iter(max_iter):
+    # Check that the amount of datapoints labeled in iteration 0 is equal to
+    # the amount of labeled datapoints we passed.
+    st = SelfTrainingClassifier(KNeighborsClassifier(), max_iter=max_iter)
+
+    st.fit(X_train, y_train_missing_labels)
+    amount_iter_0 = len(st.labeled_iter_[st.labeled_iter_ == 0])
+    assert amount_iter_0 == n_labeled_samples
+    # Check that the max of the iterations is less than the total amount of
+    # iterations
+    assert np.max(st.labeled_iter_) <= st.n_iter_ <= max_iter
+
+
+def test_no_unlabeled():
+    # Test that training on a fully labeled dataset produces the same results
+    # as training the classifier by itself.
+    knn = KNeighborsClassifier()
+    knn.fit(X_train, y_train)
+    st = SelfTrainingClassifier(knn)
+    with pytest.warns(UserWarning, match="y contains no unlabeled samples"):
+        st.fit(X_train, y_train)
+    assert_array_equal(knn.predict(X_test), st.predict(X_test))
+    # Assert that all samples were labeled in iteration 0 (since there were no
+    # unlabeled samples).
+    assert np.all(st.labeled_iter_ == 0)
+    assert st.termination_condition_ == "all_labeled"
+
+
+def test_early_stopping():
+    svc = SVC(gamma="scale", probability=True)
+    st = SelfTrainingClassifier(svc)
+    X_train_easy = [[1], [0], [1], [0.5]]
+    y_train_easy = [1, 0, -1, -1]
+    # X = [[0.5]] cannot be predicted on with a high confidence, so training
+    # stops early
+    st.fit(X_train_easy, y_train_easy)
+    assert st.n_iter_ == 1
+    assert st.termination_condition_ == "no_change"
+
+
+def test_strings_dtype():
+    clf = SelfTrainingClassifier(KNeighborsClassifier())
+    X, y = make_blobs(n_samples=30, random_state=0, cluster_std=0.1)
+    labels_multiclass = ["one", "two", "three"]
+
+    y_strings = np.take(labels_multiclass, y)
+
+    with pytest.raises(ValueError, match="dtype"):
+        clf.fit(X, y_strings)
+
+
+@pytest.mark.parametrize("verbose", [True, False])
+def test_verbose(capsys, verbose):
+    clf = SelfTrainingClassifier(KNeighborsClassifier(), verbose=verbose)
+    clf.fit(X_train, y_train_missing_labels)
+
+    captured = capsys.readouterr()
+
+    if verbose:
+        assert "iteration" in captured.out
+    else:
+        assert "iteration" not in captured.out
+
+
+def test_verbose_k_best(capsys):
+    st = SelfTrainingClassifier(
+        KNeighborsClassifier(n_neighbors=1),
+        criterion="k_best",
+        k_best=10,
+        verbose=True,
+        max_iter=None,
+    )
+
+    y_train_only_one_label = np.copy(y_train)
+    y_train_only_one_label[1:] = -1
+    n_samples = y_train.shape[0]
+
+    n_expected_iter = ceil((n_samples - 1) / 10)
+    st.fit(X_train, y_train_only_one_label)
+
+    captured = capsys.readouterr()
+
+    msg = "End of iteration {}, added {} new labels."
+    for i in range(1, n_expected_iter):
+        assert msg.format(i, 10) in captured.out
+
+    assert msg.format(n_expected_iter, (n_samples - 1) % 10) in captured.out
+
+
+def test_k_best_selects_best():
+    # Tests that the labels added by st really are the 10 best labels.
+    svc = SVC(gamma="scale", probability=True, random_state=0)
+    st = SelfTrainingClassifier(svc, criterion="k_best", max_iter=1, k_best=10)
+    has_label = y_train_missing_labels != -1
+    st.fit(X_train, y_train_missing_labels)
+
+    got_label = ~has_label & (st.transduction_ != -1)
+
+    svc.fit(X_train[has_label], y_train_missing_labels[has_label])
+    pred = svc.predict_proba(X_train[~has_label])
+    max_proba = np.max(pred, axis=1)
+
+    most_confident_svc = X_train[~has_label][np.argsort(max_proba)[-10:]]
+    added_by_st = X_train[np.where(got_label)].tolist()
+
+    for row in most_confident_svc.tolist():
+        assert row in added_by_st
+
+
+def test_base_estimator_meta_estimator():
+    # Check that a meta-estimator relying on an estimator implementing
+    # `predict_proba` will work even if it does not expose this method before being
+    # fitted.
+    # Non-regression test for:
+    # https://github.com/scikit-learn/scikit-learn/issues/19119
+
+    base_estimator = StackingClassifier(
+        estimators=[
+            ("svc_1", SVC(probability=True)),
+            ("svc_2", SVC(probability=True)),
+        ],
+        final_estimator=SVC(probability=True),
+        cv=2,
+    )
+
+    assert hasattr(base_estimator, "predict_proba")
+    clf = SelfTrainingClassifier(base_estimator=base_estimator)
+    clf.fit(X_train, y_train_missing_labels)
+    clf.predict_proba(X_test)
+
+    base_estimator = StackingClassifier(
+        estimators=[
+            ("svc_1", SVC(probability=False)),
+            ("svc_2", SVC(probability=False)),
+        ],
+        final_estimator=SVC(probability=False),
+        cv=2,
+    )
+
+    assert not hasattr(base_estimator, "predict_proba")
+    clf = SelfTrainingClassifier(base_estimator=base_estimator)
+    with pytest.raises(AttributeError):
+        clf.fit(X_train, y_train_missing_labels)
+
+
+def test_self_training_estimator_attribute_error():
+    """Check that we raise the proper AttributeErrors when the `base_estimator`
+    does not implement the `predict_proba` method, which is called from within
+    `fit`, or `decision_function`, which is decorated with `available_if`.
+
+    Non-regression test for:
+    https://github.com/scikit-learn/scikit-learn/issues/28108
+    """
+    # `SVC` with `probability=False` does not implement 'predict_proba' that
+    # is required internally in `fit` of `SelfTrainingClassifier`. We expect
+    # an AttributeError to be raised.
+    base_estimator = SVC(probability=False, gamma="scale")
+    self_training = SelfTrainingClassifier(base_estimator)
+
+    with pytest.raises(AttributeError, match="has no attribute 'predict_proba'"):
+        self_training.fit(X_train, y_train_missing_labels)
+
+    # `DecisionTreeClassifier` does not implement 'decision_function' and
+    # should raise an AttributeError
+    self_training = SelfTrainingClassifier(base_estimator=DecisionTreeClassifier())
+
+    outer_msg = "This 'SelfTrainingClassifier' has no attribute 'decision_function'"
+    inner_msg = "'DecisionTreeClassifier' object has no attribute 'decision_function'"
+    with pytest.raises(AttributeError, match=outer_msg) as exec_info:
+        self_training.fit(X_train, y_train_missing_labels).decision_function(X_train)
+    assert isinstance(exec_info.value.__cause__, AttributeError)
+    assert inner_msg in str(exec_info.value.__cause__)