feat: initial commit - Phase 1 & 2 core features

2026-04-22 17:07:33 +08:00
commit 1773bda06b
25005 changed files with 6252106 additions and 0 deletions
@@ -0,0 +1,21 @@
+"""
+The :mod:`sklearn.manifold` module implements data embedding techniques.
+"""
+
+from ._isomap import Isomap
+from ._locally_linear import LocallyLinearEmbedding, locally_linear_embedding
+from ._mds import MDS, smacof
+from ._spectral_embedding import SpectralEmbedding, spectral_embedding
+from ._t_sne import TSNE, trustworthiness
+
+__all__ = [
+    "locally_linear_embedding",
+    "LocallyLinearEmbedding",
+    "Isomap",
+    "MDS",
+    "smacof",
+    "SpectralEmbedding",
+    "spectral_embedding",
+    "TSNE",
+    "trustworthiness",
+]
@@ -0,0 +1,295 @@
+# Author: Christopher Moody <chrisemoody@gmail.com>
+# Author: Nick Travers <nickt@squareup.com>
+# Implementation by Chris Moody & Nick Travers
+# See http://homepage.tudelft.nl/19j49/t-SNE.html for reference
+# implementations and papers describing the technique
+
+
+import numpy as np
+cimport numpy as cnp
+from libc.stdio cimport printf
+from libc.math cimport log
+from libc.stdlib cimport malloc, free
+from libc.time cimport clock, clock_t
+from cython.parallel cimport prange, parallel
+
+from ..neighbors._quad_tree cimport _QuadTree
+
+cnp.import_array()
+
+
+cdef char* EMPTY_STRING = ""
+
+# Smallest strictly positive value that can be represented by floating
+# point numbers for different precision levels. This is useful to avoid
+# taking the log of zero when computing the KL divergence.
+cdef float FLOAT32_TINY = np.finfo(np.float32).tiny
+
+# Useful to void division by zero or divergence to +inf.
+cdef float FLOAT64_EPS = np.finfo(np.float64).eps
+
+# This is effectively an ifdef statement in Cython
+# It allows us to write printf debugging lines
+# and remove them at compile time
+cdef enum:
+    DEBUGFLAG = 0
+
+cdef float compute_gradient(float[:] val_P,
+                            float[:, :] pos_reference,
+                            cnp.int64_t[:] neighbors,
+                            cnp.int64_t[:] indptr,
+                            float[:, :] tot_force,
+                            _QuadTree qt,
+                            float theta,
+                            int dof,
+                            long start,
+                            bint compute_error,
+                            int num_threads) noexcept nogil:
+    # Having created the tree, calculate the gradient
+    # in two components, the positive and negative forces
+    cdef:
+        long i, coord
+        int ax
+        long n_samples = pos_reference.shape[0]
+        int n_dimensions = qt.n_dimensions
+        clock_t t1 = 0, t2 = 0
+        double sQ
+        float error
+        int take_timing = 1 if qt.verbose > 15 else 0
+
+    if qt.verbose > 11:
+        printf("[t-SNE] Allocating %li elements in force arrays\n",
+               n_samples * n_dimensions * 2)
+    cdef float* neg_f = <float*> malloc(sizeof(float) * n_samples * n_dimensions)
+    cdef float* pos_f = <float*> malloc(sizeof(float) * n_samples * n_dimensions)
+
+    if take_timing:
+        t1 = clock()
+    sQ = compute_gradient_negative(pos_reference, neg_f, qt, dof, theta, start,
+                                   num_threads)
+    if take_timing:
+        t2 = clock()
+        printf("[t-SNE] Computing negative gradient: %e ticks\n", ((float) (t2 - t1)))
+
+    if take_timing:
+        t1 = clock()
+    error = compute_gradient_positive(val_P, pos_reference, neighbors, indptr,
+                                      pos_f, n_dimensions, dof, sQ, start,
+                                      qt.verbose, compute_error, num_threads)
+    if take_timing:
+        t2 = clock()
+        printf("[t-SNE] Computing positive gradient: %e ticks\n",
+               ((float) (t2 - t1)))
+    for i in prange(start, n_samples, nogil=True, num_threads=num_threads,
+                    schedule='static'):
+        for ax in range(n_dimensions):
+            coord = i * n_dimensions + ax
+            tot_force[i, ax] = pos_f[coord] - (neg_f[coord] / sQ)
+
+    free(neg_f)
+    free(pos_f)
+    return error
+
+
+cdef float compute_gradient_positive(float[:] val_P,
+                                     float[:, :] pos_reference,
+                                     cnp.int64_t[:] neighbors,
+                                     cnp.int64_t[:] indptr,
+                                     float* pos_f,
+                                     int n_dimensions,
+                                     int dof,
+                                     double sum_Q,
+                                     cnp.int64_t start,
+                                     int verbose,
+                                     bint compute_error,
+                                     int num_threads) noexcept nogil:
+    # Sum over the following expression for i not equal to j
+    # grad_i = p_ij (1 + ||y_i - y_j||^2)^-1 (y_i - y_j)
+    # This is equivalent to compute_edge_forces in the authors' code
+    # It just goes over the nearest neighbors instead of all the data points
+    # (unlike the non-nearest neighbors version of `compute_gradient_positive')
+    cdef:
+        int ax
+        long i, j, k
+        long n_samples = indptr.shape[0] - 1
+        float C = 0.0
+        float dij, qij, pij
+        float exponent = (dof + 1.0) / 2.0
+        float float_dof = (float) (dof)
+        float* buff
+        clock_t t1 = 0, t2 = 0
+        float dt
+
+    if verbose > 10:
+        t1 = clock()
+
+    with nogil, parallel(num_threads=num_threads):
+        # Define private buffer variables
+        buff = <float *> malloc(sizeof(float) * n_dimensions)
+
+        for i in prange(start, n_samples, schedule='static'):
+            # Init the gradient vector
+            for ax in range(n_dimensions):
+                pos_f[i * n_dimensions + ax] = 0.0
+            # Compute the positive interaction for the nearest neighbors
+            for k in range(indptr[i], indptr[i+1]):
+                j = neighbors[k]
+                dij = 0.0
+                pij = val_P[k]
+                for ax in range(n_dimensions):
+                    buff[ax] = pos_reference[i, ax] - pos_reference[j, ax]
+                    dij += buff[ax] * buff[ax]
+                qij = float_dof / (float_dof + dij)
+                if dof != 1:  # i.e. exponent != 1
+                    qij = qij ** exponent
+                dij = pij * qij
+
+                # only compute the error when needed
+                if compute_error:
+                    qij = qij / sum_Q
+                    C += pij * log(max(pij, FLOAT32_TINY) / max(qij, FLOAT32_TINY))
+                for ax in range(n_dimensions):
+                    pos_f[i * n_dimensions + ax] += dij * buff[ax]
+
+        free(buff)
+    if verbose > 10:
+        t2 = clock()
+        dt = ((float) (t2 - t1))
+        printf("[t-SNE] Computed error=%1.4f in %1.1e ticks\n", C, dt)
+    return C
+
+
+cdef double compute_gradient_negative(float[:, :] pos_reference,
+                                      float* neg_f,
+                                      _QuadTree qt,
+                                      int dof,
+                                      float theta,
+                                      long start,
+                                      int num_threads) noexcept nogil:
+    cdef:
+        int ax
+        int n_dimensions = qt.n_dimensions
+        int offset = n_dimensions + 2
+        long i, j, idx
+        long n_samples = pos_reference.shape[0]
+        long n = n_samples - start
+        long dta = 0
+        long dtb = 0
+        float size, dist2s, mult
+        float exponent = (dof + 1.0) / 2.0
+        float float_dof = (float) (dof)
+        double qijZ, sum_Q = 0.0
+        float* force
+        float* neg_force
+        float* pos
+        clock_t t1 = 0, t2 = 0, t3 = 0
+        int take_timing = 1 if qt.verbose > 20 else 0
+
+    with nogil, parallel(num_threads=num_threads):
+        # Define thread-local buffers
+        summary = <float*> malloc(sizeof(float) * n * offset)
+        pos = <float *> malloc(sizeof(float) * n_dimensions)
+        force = <float *> malloc(sizeof(float) * n_dimensions)
+        neg_force = <float *> malloc(sizeof(float) * n_dimensions)
+
+        for i in prange(start, n_samples, schedule='static'):
+            # Clear the arrays
+            for ax in range(n_dimensions):
+                force[ax] = 0.0
+                neg_force[ax] = 0.0
+                pos[ax] = pos_reference[i, ax]
+
+            # Find which nodes are summarizing and collect their centers of mass
+            # deltas, and sizes, into vectorized arrays
+            if take_timing:
+                t1 = clock()
+            idx = qt.summarize(pos, summary, theta*theta)
+            if take_timing:
+                t2 = clock()
+            # Compute the t-SNE negative force
+            # for the digits dataset, walking the tree
+            # is about 10-15x more expensive than the
+            # following for loop
+            for j in range(idx // offset):
+
+                dist2s = summary[j * offset + n_dimensions]
+                size = summary[j * offset + n_dimensions + 1]
+                qijZ = float_dof / (float_dof + dist2s)  # 1/(1+dist)
+                if dof != 1:  # i.e. exponent != 1
+                    qijZ = qijZ ** exponent
+
+                sum_Q += size * qijZ   # size of the node * q
+                mult = size * qijZ * qijZ
+                for ax in range(n_dimensions):
+                    neg_force[ax] += mult * summary[j * offset + ax]
+            if take_timing:
+                t3 = clock()
+            for ax in range(n_dimensions):
+                neg_f[i * n_dimensions + ax] = neg_force[ax]
+            if take_timing:
+                dta += t2 - t1
+                dtb += t3 - t2
+        free(pos)
+        free(force)
+        free(neg_force)
+        free(summary)
+    if take_timing:
+        printf("[t-SNE] Tree: %li clock ticks | ", dta)
+        printf("Force computation: %li clock ticks\n", dtb)
+
+    # Put sum_Q to machine EPSILON to avoid divisions by 0
+    sum_Q = max(sum_Q, FLOAT64_EPS)
+    return sum_Q
+
+
+def gradient(float[:] val_P,
+             float[:, :] pos_output,
+             cnp.int64_t[:] neighbors,
+             cnp.int64_t[:] indptr,
+             float[:, :] forces,
+             float theta,
+             int n_dimensions,
+             int verbose,
+             int dof=1,
+             long skip_num_points=0,
+             bint compute_error=1,
+             int num_threads=1):
+    # This function is designed to be called from external Python
+    # it passes the 'forces' array by reference and fills that's array
+    # up in-place
+    cdef float C
+    cdef int n
+    n = pos_output.shape[0]
+    assert val_P.itemsize == 4
+    assert pos_output.itemsize == 4
+    assert forces.itemsize == 4
+    m = "Forces array and pos_output shapes are incompatible"
+    assert n == forces.shape[0], m
+    m = "Pij and pos_output shapes are incompatible"
+    assert n == indptr.shape[0] - 1, m
+    if verbose > 10:
+        printf("[t-SNE] Initializing tree of n_dimensions %i\n", n_dimensions)
+    cdef _QuadTree qt = _QuadTree(pos_output.shape[1], verbose)
+    if verbose > 10:
+        printf("[t-SNE] Inserting %li points\n", pos_output.shape[0])
+    qt.build_tree(pos_output)
+    if verbose > 10:
+        # XXX: format hack to workaround lack of `const char *` type
+        # in the generated C code that triggers error with gcc 4.9
+        # and -Werror=format-security
+        printf("[t-SNE] Computing gradient\n%s", EMPTY_STRING)
+
+    C = compute_gradient(val_P, pos_output, neighbors, indptr, forces,
+                         qt, theta, dof, skip_num_points, compute_error,
+                         num_threads)
+
+    if verbose > 10:
+        # XXX: format hack to workaround lack of `const char *` type
+        # in the generated C code
+        # and -Werror=format-security
+        printf("[t-SNE] Checking tree consistency\n%s", EMPTY_STRING)
+    m = "Tree consistency failed: unexpected number of points on the tree"
+    assert qt.cells[0].cumulative_size == qt.n_points, m
+    if not compute_error:
+        C = np.nan
+    return C
@@ -0,0 +1,438 @@
+"""Isomap for manifold learning"""
+
+# Author: Jake Vanderplas  -- <vanderplas@astro.washington.edu>
+# License: BSD 3 clause (C) 2011
+import warnings
+from numbers import Integral, Real
+
+import numpy as np
+from scipy.sparse import issparse
+from scipy.sparse.csgraph import connected_components, shortest_path
+
+from ..base import (
+    BaseEstimator,
+    ClassNamePrefixFeaturesOutMixin,
+    TransformerMixin,
+    _fit_context,
+)
+from ..decomposition import KernelPCA
+from ..metrics.pairwise import _VALID_METRICS
+from ..neighbors import NearestNeighbors, kneighbors_graph, radius_neighbors_graph
+from ..preprocessing import KernelCenterer
+from ..utils._param_validation import Interval, StrOptions
+from ..utils.graph import _fix_connected_components
+from ..utils.validation import check_is_fitted
+
+
+class Isomap(ClassNamePrefixFeaturesOutMixin, TransformerMixin, BaseEstimator):
+    """Isomap Embedding.
+
+    Non-linear dimensionality reduction through Isometric Mapping
+
+    Read more in the :ref:`User Guide <isomap>`.
+
+    Parameters
+    ----------
+    n_neighbors : int or None, default=5
+        Number of neighbors to consider for each point. If `n_neighbors` is an int,
+        then `radius` must be `None`.
+
+    radius : float or None, default=None
+        Limiting distance of neighbors to return. If `radius` is a float,
+        then `n_neighbors` must be set to `None`.
+
+        .. versionadded:: 1.1
+
+    n_components : int, default=2
+        Number of coordinates for the manifold.
+
+    eigen_solver : {'auto', 'arpack', 'dense'}, default='auto'
+        'auto' : Attempt to choose the most efficient solver
+        for the given problem.
+
+        'arpack' : Use Arnoldi decomposition to find the eigenvalues
+        and eigenvectors.
+
+        'dense' : Use a direct solver (i.e. LAPACK)
+        for the eigenvalue decomposition.
+
+    tol : float, default=0
+        Convergence tolerance passed to arpack or lobpcg.
+        not used if eigen_solver == 'dense'.
+
+    max_iter : int, default=None
+        Maximum number of iterations for the arpack solver.
+        not used if eigen_solver == 'dense'.
+
+    path_method : {'auto', 'FW', 'D'}, default='auto'
+        Method to use in finding shortest path.
+
+        'auto' : attempt to choose the best algorithm automatically.
+
+        'FW' : Floyd-Warshall algorithm.
+
+        'D' : Dijkstra's algorithm.
+
+    neighbors_algorithm : {'auto', 'brute', 'kd_tree', 'ball_tree'}, \
+                          default='auto'
+        Algorithm to use for nearest neighbors search,
+        passed to neighbors.NearestNeighbors instance.
+
+    n_jobs : int or None, default=None
+        The number of parallel jobs to run.
+        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.
+        ``-1`` means using all processors. See :term:`Glossary <n_jobs>`
+        for more details.
+
+    metric : str, or callable, default="minkowski"
+        The metric to use when calculating distance between instances in a
+        feature array. If metric is a string or callable, it must be one of
+        the options allowed by :func:`sklearn.metrics.pairwise_distances` for
+        its metric parameter.
+        If metric is "precomputed", X is assumed to be a distance matrix and
+        must be square. X may be a :term:`Glossary <sparse graph>`.
+
+        .. versionadded:: 0.22
+
+    p : float, default=2
+        Parameter for the Minkowski metric from
+        sklearn.metrics.pairwise.pairwise_distances. When p = 1, this is
+        equivalent to using manhattan_distance (l1), and euclidean_distance
+        (l2) for p = 2. For arbitrary p, minkowski_distance (l_p) is used.
+
+        .. versionadded:: 0.22
+
+    metric_params : dict, default=None
+        Additional keyword arguments for the metric function.
+
+        .. versionadded:: 0.22
+
+    Attributes
+    ----------
+    embedding_ : array-like, shape (n_samples, n_components)
+        Stores the embedding vectors.
+
+    kernel_pca_ : object
+        :class:`~sklearn.decomposition.KernelPCA` object used to implement the
+        embedding.
+
+    nbrs_ : sklearn.neighbors.NearestNeighbors instance
+        Stores nearest neighbors instance, including BallTree or KDtree
+        if applicable.
+
+    dist_matrix_ : array-like, shape (n_samples, n_samples)
+        Stores the geodesic distance matrix of training data.
+
+    n_features_in_ : int
+        Number of features seen during :term:`fit`.
+
+        .. versionadded:: 0.24
+
+    feature_names_in_ : ndarray of shape (`n_features_in_`,)
+        Names of features seen during :term:`fit`. Defined only when `X`
+        has feature names that are all strings.
+
+        .. versionadded:: 1.0
+
+    See Also
+    --------
+    sklearn.decomposition.PCA : Principal component analysis that is a linear
+        dimensionality reduction method.
+    sklearn.decomposition.KernelPCA : Non-linear dimensionality reduction using
+        kernels and PCA.
+    MDS : Manifold learning using multidimensional scaling.
+    TSNE : T-distributed Stochastic Neighbor Embedding.
+    LocallyLinearEmbedding : Manifold learning using Locally Linear Embedding.
+    SpectralEmbedding : Spectral embedding for non-linear dimensionality.
+
+    References
+    ----------
+
+    .. [1] Tenenbaum, J.B.; De Silva, V.; & Langford, J.C. A global geometric
+           framework for nonlinear dimensionality reduction. Science 290 (5500)
+
+    Examples
+    --------
+    >>> from sklearn.datasets import load_digits
+    >>> from sklearn.manifold import Isomap
+    >>> X, _ = load_digits(return_X_y=True)
+    >>> X.shape
+    (1797, 64)
+    >>> embedding = Isomap(n_components=2)
+    >>> X_transformed = embedding.fit_transform(X[:100])
+    >>> X_transformed.shape
+    (100, 2)
+    """
+
+    _parameter_constraints: dict = {
+        "n_neighbors": [Interval(Integral, 1, None, closed="left"), None],
+        "radius": [Interval(Real, 0, None, closed="both"), None],
+        "n_components": [Interval(Integral, 1, None, closed="left")],
+        "eigen_solver": [StrOptions({"auto", "arpack", "dense"})],
+        "tol": [Interval(Real, 0, None, closed="left")],
+        "max_iter": [Interval(Integral, 1, None, closed="left"), None],
+        "path_method": [StrOptions({"auto", "FW", "D"})],
+        "neighbors_algorithm": [StrOptions({"auto", "brute", "kd_tree", "ball_tree"})],
+        "n_jobs": [Integral, None],
+        "p": [Interval(Real, 1, None, closed="left")],
+        "metric": [StrOptions(set(_VALID_METRICS) | {"precomputed"}), callable],
+        "metric_params": [dict, None],
+    }
+
+    def __init__(
+        self,
+        *,
+        n_neighbors=5,
+        radius=None,
+        n_components=2,
+        eigen_solver="auto",
+        tol=0,
+        max_iter=None,
+        path_method="auto",
+        neighbors_algorithm="auto",
+        n_jobs=None,
+        metric="minkowski",
+        p=2,
+        metric_params=None,
+    ):
+        self.n_neighbors = n_neighbors
+        self.radius = radius
+        self.n_components = n_components
+        self.eigen_solver = eigen_solver
+        self.tol = tol
+        self.max_iter = max_iter
+        self.path_method = path_method
+        self.neighbors_algorithm = neighbors_algorithm
+        self.n_jobs = n_jobs
+        self.metric = metric
+        self.p = p
+        self.metric_params = metric_params
+
+    def _fit_transform(self, X):
+        if self.n_neighbors is not None and self.radius is not None:
+            raise ValueError(
+                "Both n_neighbors and radius are provided. Use"
+                f" Isomap(radius={self.radius}, n_neighbors=None) if intended to use"
+                " radius-based neighbors"
+            )
+
+        self.nbrs_ = NearestNeighbors(
+            n_neighbors=self.n_neighbors,
+            radius=self.radius,
+            algorithm=self.neighbors_algorithm,
+            metric=self.metric,
+            p=self.p,
+            metric_params=self.metric_params,
+            n_jobs=self.n_jobs,
+        )
+        self.nbrs_.fit(X)
+        self.n_features_in_ = self.nbrs_.n_features_in_
+        if hasattr(self.nbrs_, "feature_names_in_"):
+            self.feature_names_in_ = self.nbrs_.feature_names_in_
+
+        self.kernel_pca_ = KernelPCA(
+            n_components=self.n_components,
+            kernel="precomputed",
+            eigen_solver=self.eigen_solver,
+            tol=self.tol,
+            max_iter=self.max_iter,
+            n_jobs=self.n_jobs,
+        ).set_output(transform="default")
+
+        if self.n_neighbors is not None:
+            nbg = kneighbors_graph(
+                self.nbrs_,
+                self.n_neighbors,
+                metric=self.metric,
+                p=self.p,
+                metric_params=self.metric_params,
+                mode="distance",
+                n_jobs=self.n_jobs,
+            )
+        else:
+            nbg = radius_neighbors_graph(
+                self.nbrs_,
+                radius=self.radius,
+                metric=self.metric,
+                p=self.p,
+                metric_params=self.metric_params,
+                mode="distance",
+                n_jobs=self.n_jobs,
+            )
+
+        # Compute the number of connected components, and connect the different
+        # components to be able to compute a shortest path between all pairs
+        # of samples in the graph.
+        # Similar fix to cluster._agglomerative._fix_connectivity.
+        n_connected_components, labels = connected_components(nbg)
+        if n_connected_components > 1:
+            if self.metric == "precomputed" and issparse(X):
+                raise RuntimeError(
+                    "The number of connected components of the neighbors graph"
+                    f" is {n_connected_components} > 1. The graph cannot be "
+                    "completed with metric='precomputed', and Isomap cannot be"
+                    "fitted. Increase the number of neighbors to avoid this "
+                    "issue, or precompute the full distance matrix instead "
+                    "of passing a sparse neighbors graph."
+                )
+            warnings.warn(
+                (
+                    "The number of connected components of the neighbors graph "
+                    f"is {n_connected_components} > 1. Completing the graph to fit"
+                    " Isomap might be slow. Increase the number of neighbors to "
+                    "avoid this issue."
+                ),
+                stacklevel=2,
+            )
+
+            # use array validated by NearestNeighbors
+            nbg = _fix_connected_components(
+                X=self.nbrs_._fit_X,
+                graph=nbg,
+                n_connected_components=n_connected_components,
+                component_labels=labels,
+                mode="distance",
+                metric=self.nbrs_.effective_metric_,
+                **self.nbrs_.effective_metric_params_,
+            )
+
+        self.dist_matrix_ = shortest_path(nbg, method=self.path_method, directed=False)
+
+        if self.nbrs_._fit_X.dtype == np.float32:
+            self.dist_matrix_ = self.dist_matrix_.astype(
+                self.nbrs_._fit_X.dtype, copy=False
+            )
+
+        G = self.dist_matrix_**2
+        G *= -0.5
+
+        self.embedding_ = self.kernel_pca_.fit_transform(G)
+        self._n_features_out = self.embedding_.shape[1]
+
+    def reconstruction_error(self):
+        """Compute the reconstruction error for the embedding.
+
+        Returns
+        -------
+        reconstruction_error : float
+            Reconstruction error.
+
+        Notes
+        -----
+        The cost function of an isomap embedding is
+
+        ``E = frobenius_norm[K(D) - K(D_fit)] / n_samples``
+
+        Where D is the matrix of distances for the input data X,
+        D_fit is the matrix of distances for the output embedding X_fit,
+        and K is the isomap kernel:
+
+        ``K(D) = -0.5 * (I - 1/n_samples) * D^2 * (I - 1/n_samples)``
+        """
+        G = -0.5 * self.dist_matrix_**2
+        G_center = KernelCenterer().fit_transform(G)
+        evals = self.kernel_pca_.eigenvalues_
+        return np.sqrt(np.sum(G_center**2) - np.sum(evals**2)) / G.shape[0]
+
+    @_fit_context(
+        # Isomap.metric is not validated yet
+        prefer_skip_nested_validation=False
+    )
+    def fit(self, X, y=None):
+        """Compute the embedding vectors for data X.
+
+        Parameters
+        ----------
+        X : {array-like, sparse matrix, BallTree, KDTree, NearestNeighbors}
+            Sample data, shape = (n_samples, n_features), in the form of a
+            numpy array, sparse matrix, precomputed tree, or NearestNeighbors
+            object.
+
+        y : Ignored
+            Not used, present for API consistency by convention.
+
+        Returns
+        -------
+        self : object
+            Returns a fitted instance of self.
+        """
+        self._fit_transform(X)
+        return self
+
+    @_fit_context(
+        # Isomap.metric is not validated yet
+        prefer_skip_nested_validation=False
+    )
+    def fit_transform(self, X, y=None):
+        """Fit the model from data in X and transform X.
+
+        Parameters
+        ----------
+        X : {array-like, sparse matrix, BallTree, KDTree}
+            Training vector, where `n_samples` is the number of samples
+            and `n_features` is the number of features.
+
+        y : Ignored
+            Not used, present for API consistency by convention.
+
+        Returns
+        -------
+        X_new : array-like, shape (n_samples, n_components)
+            X transformed in the new space.
+        """
+        self._fit_transform(X)
+        return self.embedding_
+
+    def transform(self, X):
+        """Transform X.
+
+        This is implemented by linking the points X into the graph of geodesic
+        distances of the training data. First the `n_neighbors` nearest
+        neighbors of X are found in the training data, and from these the
+        shortest geodesic distances from each point in X to each point in
+        the training data are computed in order to construct the kernel.
+        The embedding of X is the projection of this kernel onto the
+        embedding vectors of the training set.
+
+        Parameters
+        ----------
+        X : {array-like, sparse matrix}, shape (n_queries, n_features)
+            If neighbors_algorithm='precomputed', X is assumed to be a
+            distance matrix or a sparse graph of shape
+            (n_queries, n_samples_fit).
+
+        Returns
+        -------
+        X_new : array-like, shape (n_queries, n_components)
+            X transformed in the new space.
+        """
+        check_is_fitted(self)
+        if self.n_neighbors is not None:
+            distances, indices = self.nbrs_.kneighbors(X, return_distance=True)
+        else:
+            distances, indices = self.nbrs_.radius_neighbors(X, return_distance=True)
+
+        # Create the graph of shortest distances from X to
+        # training data via the nearest neighbors of X.
+        # This can be done as a single array operation, but it potentially
+        # takes a lot of memory.  To avoid that, use a loop:
+
+        n_samples_fit = self.nbrs_.n_samples_fit_
+        n_queries = distances.shape[0]
+
+        if hasattr(X, "dtype") and X.dtype == np.float32:
+            dtype = np.float32
+        else:
+            dtype = np.float64
+
+        G_X = np.zeros((n_queries, n_samples_fit), dtype)
+        for i in range(n_queries):
+            G_X[i] = np.min(self.dist_matrix_[indices[i]] + distances[i][:, None], 0)
+
+        G_X **= 2
+        G_X *= -0.5
+
+        return self.kernel_pca_.transform(G_X)
+
+    def _more_tags(self):
+        return {"preserves_dtype": [np.float64, np.float32]}
@@ -0,0 +1,880 @@
+"""Locally Linear Embedding"""
+
+# Author: Fabian Pedregosa -- <fabian.pedregosa@inria.fr>
+#         Jake Vanderplas  -- <vanderplas@astro.washington.edu>
+# License: BSD 3 clause (C) INRIA 2011
+
+from numbers import Integral, Real
+
+import numpy as np
+from scipy.linalg import eigh, qr, solve, svd
+from scipy.sparse import csr_matrix, eye
+from scipy.sparse.linalg import eigsh
+
+from ..base import (
+    BaseEstimator,
+    ClassNamePrefixFeaturesOutMixin,
+    TransformerMixin,
+    _fit_context,
+    _UnstableArchMixin,
+)
+from ..neighbors import NearestNeighbors
+from ..utils import check_array, check_random_state
+from ..utils._arpack import _init_arpack_v0
+from ..utils._param_validation import Interval, StrOptions, validate_params
+from ..utils.extmath import stable_cumsum
+from ..utils.validation import FLOAT_DTYPES, check_is_fitted
+
+
+def barycenter_weights(X, Y, indices, reg=1e-3):
+    """Compute barycenter weights of X from Y along the first axis
+
+    We estimate the weights to assign to each point in Y[indices] to recover
+    the point X[i]. The barycenter weights sum to 1.
+
+    Parameters
+    ----------
+    X : array-like, shape (n_samples, n_dim)
+
+    Y : array-like, shape (n_samples, n_dim)
+
+    indices : array-like, shape (n_samples, n_dim)
+            Indices of the points in Y used to compute the barycenter
+
+    reg : float, default=1e-3
+        Amount of regularization to add for the problem to be
+        well-posed in the case of n_neighbors > n_dim
+
+    Returns
+    -------
+    B : array-like, shape (n_samples, n_neighbors)
+
+    Notes
+    -----
+    See developers note for more information.
+    """
+    X = check_array(X, dtype=FLOAT_DTYPES)
+    Y = check_array(Y, dtype=FLOAT_DTYPES)
+    indices = check_array(indices, dtype=int)
+
+    n_samples, n_neighbors = indices.shape
+    assert X.shape[0] == n_samples
+
+    B = np.empty((n_samples, n_neighbors), dtype=X.dtype)
+    v = np.ones(n_neighbors, dtype=X.dtype)
+
+    # this might raise a LinalgError if G is singular and has trace
+    # zero
+    for i, ind in enumerate(indices):
+        A = Y[ind]
+        C = A - X[i]  # broadcasting
+        G = np.dot(C, C.T)
+        trace = np.trace(G)
+        if trace > 0:
+            R = reg * trace
+        else:
+            R = reg
+        G.flat[:: n_neighbors + 1] += R
+        w = solve(G, v, assume_a="pos")
+        B[i, :] = w / np.sum(w)
+    return B
+
+
+def barycenter_kneighbors_graph(X, n_neighbors, reg=1e-3, n_jobs=None):
+    """Computes the barycenter weighted graph of k-Neighbors for points in X
+
+    Parameters
+    ----------
+    X : {array-like, NearestNeighbors}
+        Sample data, shape = (n_samples, n_features), in the form of a
+        numpy array or a NearestNeighbors object.
+
+    n_neighbors : int
+        Number of neighbors for each sample.
+
+    reg : float, default=1e-3
+        Amount of regularization when solving the least-squares
+        problem. Only relevant if mode='barycenter'. If None, use the
+        default.
+
+    n_jobs : int or None, default=None
+        The number of parallel jobs to run for neighbors search.
+        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.
+        ``-1`` means using all processors. See :term:`Glossary <n_jobs>`
+        for more details.
+
+    Returns
+    -------
+    A : sparse matrix in CSR format, shape = [n_samples, n_samples]
+        A[i, j] is assigned the weight of edge that connects i to j.
+
+    See Also
+    --------
+    sklearn.neighbors.kneighbors_graph
+    sklearn.neighbors.radius_neighbors_graph
+    """
+    knn = NearestNeighbors(n_neighbors=n_neighbors + 1, n_jobs=n_jobs).fit(X)
+    X = knn._fit_X
+    n_samples = knn.n_samples_fit_
+    ind = knn.kneighbors(X, return_distance=False)[:, 1:]
+    data = barycenter_weights(X, X, ind, reg=reg)
+    indptr = np.arange(0, n_samples * n_neighbors + 1, n_neighbors)
+    return csr_matrix((data.ravel(), ind.ravel(), indptr), shape=(n_samples, n_samples))
+
+
+def null_space(
+    M, k, k_skip=1, eigen_solver="arpack", tol=1e-6, max_iter=100, random_state=None
+):
+    """
+    Find the null space of a matrix M.
+
+    Parameters
+    ----------
+    M : {array, matrix, sparse matrix, LinearOperator}
+        Input covariance matrix: should be symmetric positive semi-definite
+
+    k : int
+        Number of eigenvalues/vectors to return
+
+    k_skip : int, default=1
+        Number of low eigenvalues to skip.
+
+    eigen_solver : {'auto', 'arpack', 'dense'}, default='arpack'
+        auto : algorithm will attempt to choose the best method for input data
+        arpack : use arnoldi iteration in shift-invert mode.
+                    For this method, M may be a dense matrix, sparse matrix,
+                    or general linear operator.
+                    Warning: ARPACK can be unstable for some problems.  It is
+                    best to try several random seeds in order to check results.
+        dense  : use standard dense matrix operations for the eigenvalue
+                    decomposition.  For this method, M must be an array
+                    or matrix type.  This method should be avoided for
+                    large problems.
+
+    tol : float, default=1e-6
+        Tolerance for 'arpack' method.
+        Not used if eigen_solver=='dense'.
+
+    max_iter : int, default=100
+        Maximum number of iterations for 'arpack' method.
+        Not used if eigen_solver=='dense'
+
+    random_state : int, RandomState instance, default=None
+        Determines the random number generator when ``solver`` == 'arpack'.
+        Pass an int for reproducible results across multiple function calls.
+        See :term:`Glossary <random_state>`.
+    """
+    if eigen_solver == "auto":
+        if M.shape[0] > 200 and k + k_skip < 10:
+            eigen_solver = "arpack"
+        else:
+            eigen_solver = "dense"
+
+    if eigen_solver == "arpack":
+        v0 = _init_arpack_v0(M.shape[0], random_state)
+        try:
+            eigen_values, eigen_vectors = eigsh(
+                M, k + k_skip, sigma=0.0, tol=tol, maxiter=max_iter, v0=v0
+            )
+        except RuntimeError as e:
+            raise ValueError(
+                "Error in determining null-space with ARPACK. Error message: "
+                "'%s'. Note that eigen_solver='arpack' can fail when the "
+                "weight matrix is singular or otherwise ill-behaved. In that "
+                "case, eigen_solver='dense' is recommended. See online "
+                "documentation for more information." % e
+            ) from e
+
+        return eigen_vectors[:, k_skip:], np.sum(eigen_values[k_skip:])
+    elif eigen_solver == "dense":
+        if hasattr(M, "toarray"):
+            M = M.toarray()
+        eigen_values, eigen_vectors = eigh(
+            M, subset_by_index=(k_skip, k + k_skip - 1), overwrite_a=True
+        )
+        index = np.argsort(np.abs(eigen_values))
+        return eigen_vectors[:, index], np.sum(eigen_values)
+    else:
+        raise ValueError("Unrecognized eigen_solver '%s'" % eigen_solver)
+
+
+def _locally_linear_embedding(
+    X,
+    *,
+    n_neighbors,
+    n_components,
+    reg=1e-3,
+    eigen_solver="auto",
+    tol=1e-6,
+    max_iter=100,
+    method="standard",
+    hessian_tol=1e-4,
+    modified_tol=1e-12,
+    random_state=None,
+    n_jobs=None,
+):
+    nbrs = NearestNeighbors(n_neighbors=n_neighbors + 1, n_jobs=n_jobs)
+    nbrs.fit(X)
+    X = nbrs._fit_X
+
+    N, d_in = X.shape
+
+    if n_components > d_in:
+        raise ValueError(
+            "output dimension must be less than or equal to input dimension"
+        )
+    if n_neighbors >= N:
+        raise ValueError(
+            "Expected n_neighbors <= n_samples,  but n_samples = %d, n_neighbors = %d"
+            % (N, n_neighbors)
+        )
+
+    M_sparse = eigen_solver != "dense"
+
+    if method == "standard":
+        W = barycenter_kneighbors_graph(
+            nbrs, n_neighbors=n_neighbors, reg=reg, n_jobs=n_jobs
+        )
+
+        # we'll compute M = (I-W)'(I-W)
+        # depending on the solver, we'll do this differently
+        if M_sparse:
+            M = eye(*W.shape, format=W.format) - W
+            M = (M.T * M).tocsr()
+        else:
+            M = (W.T * W - W.T - W).toarray()
+            M.flat[:: M.shape[0] + 1] += 1  # W = W - I = W - I
+
+    elif method == "hessian":
+        dp = n_components * (n_components + 1) // 2
+
+        if n_neighbors <= n_components + dp:
+            raise ValueError(
+                "for method='hessian', n_neighbors must be "
+                "greater than "
+                "[n_components * (n_components + 3) / 2]"
+            )
+
+        neighbors = nbrs.kneighbors(
+            X, n_neighbors=n_neighbors + 1, return_distance=False
+        )
+        neighbors = neighbors[:, 1:]
+
+        Yi = np.empty((n_neighbors, 1 + n_components + dp), dtype=np.float64)
+        Yi[:, 0] = 1
+
+        M = np.zeros((N, N), dtype=np.float64)
+
+        use_svd = n_neighbors > d_in
+
+        for i in range(N):
+            Gi = X[neighbors[i]]
+            Gi -= Gi.mean(0)
+
+            # build Hessian estimator
+            if use_svd:
+                U = svd(Gi, full_matrices=0)[0]
+            else:
+                Ci = np.dot(Gi, Gi.T)
+                U = eigh(Ci)[1][:, ::-1]
+
+            Yi[:, 1 : 1 + n_components] = U[:, :n_components]
+
+            j = 1 + n_components
+            for k in range(n_components):
+                Yi[:, j : j + n_components - k] = U[:, k : k + 1] * U[:, k:n_components]
+                j += n_components - k
+
+            Q, R = qr(Yi)
+
+            w = Q[:, n_components + 1 :]
+            S = w.sum(0)
+
+            S[np.where(abs(S) < hessian_tol)] = 1
+            w /= S
+
+            nbrs_x, nbrs_y = np.meshgrid(neighbors[i], neighbors[i])
+            M[nbrs_x, nbrs_y] += np.dot(w, w.T)
+
+        if M_sparse:
+            M = csr_matrix(M)
+
+    elif method == "modified":
+        if n_neighbors < n_components:
+            raise ValueError("modified LLE requires n_neighbors >= n_components")
+
+        neighbors = nbrs.kneighbors(
+            X, n_neighbors=n_neighbors + 1, return_distance=False
+        )
+        neighbors = neighbors[:, 1:]
+
+        # find the eigenvectors and eigenvalues of each local covariance
+        # matrix. We want V[i] to be a [n_neighbors x n_neighbors] matrix,
+        # where the columns are eigenvectors
+        V = np.zeros((N, n_neighbors, n_neighbors))
+        nev = min(d_in, n_neighbors)
+        evals = np.zeros([N, nev])
+
+        # choose the most efficient way to find the eigenvectors
+        use_svd = n_neighbors > d_in
+
+        if use_svd:
+            for i in range(N):
+                X_nbrs = X[neighbors[i]] - X[i]
+                V[i], evals[i], _ = svd(X_nbrs, full_matrices=True)
+            evals **= 2
+        else:
+            for i in range(N):
+                X_nbrs = X[neighbors[i]] - X[i]
+                C_nbrs = np.dot(X_nbrs, X_nbrs.T)
+                evi, vi = eigh(C_nbrs)
+                evals[i] = evi[::-1]
+                V[i] = vi[:, ::-1]
+
+        # find regularized weights: this is like normal LLE.
+        # because we've already computed the SVD of each covariance matrix,
+        # it's faster to use this rather than np.linalg.solve
+        reg = 1e-3 * evals.sum(1)
+
+        tmp = np.dot(V.transpose(0, 2, 1), np.ones(n_neighbors))
+        tmp[:, :nev] /= evals + reg[:, None]
+        tmp[:, nev:] /= reg[:, None]
+
+        w_reg = np.zeros((N, n_neighbors))
+        for i in range(N):
+            w_reg[i] = np.dot(V[i], tmp[i])
+        w_reg /= w_reg.sum(1)[:, None]
+
+        # calculate eta: the median of the ratio of small to large eigenvalues
+        # across the points.  This is used to determine s_i, below
+        rho = evals[:, n_components:].sum(1) / evals[:, :n_components].sum(1)
+        eta = np.median(rho)
+
+        # find s_i, the size of the "almost null space" for each point:
+        # this is the size of the largest set of eigenvalues
+        # such that Sum[v; v in set]/Sum[v; v not in set] < eta
+        s_range = np.zeros(N, dtype=int)
+        evals_cumsum = stable_cumsum(evals, 1)
+        eta_range = evals_cumsum[:, -1:] / evals_cumsum[:, :-1] - 1
+        for i in range(N):
+            s_range[i] = np.searchsorted(eta_range[i, ::-1], eta)
+        s_range += n_neighbors - nev  # number of zero eigenvalues
+
+        # Now calculate M.
+        # This is the [N x N] matrix whose null space is the desired embedding
+        M = np.zeros((N, N), dtype=np.float64)
+        for i in range(N):
+            s_i = s_range[i]
+
+            # select bottom s_i eigenvectors and calculate alpha
+            Vi = V[i, :, n_neighbors - s_i :]
+            alpha_i = np.linalg.norm(Vi.sum(0)) / np.sqrt(s_i)
+
+            # compute Householder matrix which satisfies
+            #  Hi*Vi.T*ones(n_neighbors) = alpha_i*ones(s)
+            # using prescription from paper
+            h = np.full(s_i, alpha_i) - np.dot(Vi.T, np.ones(n_neighbors))
+
+            norm_h = np.linalg.norm(h)
+            if norm_h < modified_tol:
+                h *= 0
+            else:
+                h /= norm_h
+
+            # Householder matrix is
+            #  >> Hi = np.identity(s_i) - 2*np.outer(h,h)
+            # Then the weight matrix is
+            #  >> Wi = np.dot(Vi,Hi) + (1-alpha_i) * w_reg[i,:,None]
+            # We do this much more efficiently:
+            Wi = Vi - 2 * np.outer(np.dot(Vi, h), h) + (1 - alpha_i) * w_reg[i, :, None]
+
+            # Update M as follows:
+            # >> W_hat = np.zeros( (N,s_i) )
+            # >> W_hat[neighbors[i],:] = Wi
+            # >> W_hat[i] -= 1
+            # >> M += np.dot(W_hat,W_hat.T)
+            # We can do this much more efficiently:
+            nbrs_x, nbrs_y = np.meshgrid(neighbors[i], neighbors[i])
+            M[nbrs_x, nbrs_y] += np.dot(Wi, Wi.T)
+            Wi_sum1 = Wi.sum(1)
+            M[i, neighbors[i]] -= Wi_sum1
+            M[neighbors[i], i] -= Wi_sum1
+            M[i, i] += s_i
+
+        if M_sparse:
+            M = csr_matrix(M)
+
+    elif method == "ltsa":
+        neighbors = nbrs.kneighbors(
+            X, n_neighbors=n_neighbors + 1, return_distance=False
+        )
+        neighbors = neighbors[:, 1:]
+
+        M = np.zeros((N, N))
+
+        use_svd = n_neighbors > d_in
+
+        for i in range(N):
+            Xi = X[neighbors[i]]
+            Xi -= Xi.mean(0)
+
+            # compute n_components largest eigenvalues of Xi * Xi^T
+            if use_svd:
+                v = svd(Xi, full_matrices=True)[0]
+            else:
+                Ci = np.dot(Xi, Xi.T)
+                v = eigh(Ci)[1][:, ::-1]
+
+            Gi = np.zeros((n_neighbors, n_components + 1))
+            Gi[:, 1:] = v[:, :n_components]
+            Gi[:, 0] = 1.0 / np.sqrt(n_neighbors)
+
+            GiGiT = np.dot(Gi, Gi.T)
+
+            nbrs_x, nbrs_y = np.meshgrid(neighbors[i], neighbors[i])
+            M[nbrs_x, nbrs_y] -= GiGiT
+            M[neighbors[i], neighbors[i]] += 1
+
+    return null_space(
+        M,
+        n_components,
+        k_skip=1,
+        eigen_solver=eigen_solver,
+        tol=tol,
+        max_iter=max_iter,
+        random_state=random_state,
+    )
+
+
+@validate_params(
+    {
+        "X": ["array-like", NearestNeighbors],
+        "n_neighbors": [Interval(Integral, 1, None, closed="left")],
+        "n_components": [Interval(Integral, 1, None, closed="left")],
+        "reg": [Interval(Real, 0, None, closed="left")],
+        "eigen_solver": [StrOptions({"auto", "arpack", "dense"})],
+        "tol": [Interval(Real, 0, None, closed="left")],
+        "max_iter": [Interval(Integral, 1, None, closed="left")],
+        "method": [StrOptions({"standard", "hessian", "modified", "ltsa"})],
+        "hessian_tol": [Interval(Real, 0, None, closed="left")],
+        "modified_tol": [Interval(Real, 0, None, closed="left")],
+        "random_state": ["random_state"],
+        "n_jobs": [None, Integral],
+    },
+    prefer_skip_nested_validation=True,
+)
+def locally_linear_embedding(
+    X,
+    *,
+    n_neighbors,
+    n_components,
+    reg=1e-3,
+    eigen_solver="auto",
+    tol=1e-6,
+    max_iter=100,
+    method="standard",
+    hessian_tol=1e-4,
+    modified_tol=1e-12,
+    random_state=None,
+    n_jobs=None,
+):
+    """Perform a Locally Linear Embedding analysis on the data.
+
+    Read more in the :ref:`User Guide <locally_linear_embedding>`.
+
+    Parameters
+    ----------
+    X : {array-like, NearestNeighbors}
+        Sample data, shape = (n_samples, n_features), in the form of a
+        numpy array or a NearestNeighbors object.
+
+    n_neighbors : int
+        Number of neighbors to consider for each point.
+
+    n_components : int
+        Number of coordinates for the manifold.
+
+    reg : float, default=1e-3
+        Regularization constant, multiplies the trace of the local covariance
+        matrix of the distances.
+
+    eigen_solver : {'auto', 'arpack', 'dense'}, default='auto'
+        auto : algorithm will attempt to choose the best method for input data
+
+        arpack : use arnoldi iteration in shift-invert mode.
+                    For this method, M may be a dense matrix, sparse matrix,
+                    or general linear operator.
+                    Warning: ARPACK can be unstable for some problems.  It is
+                    best to try several random seeds in order to check results.
+
+        dense  : use standard dense matrix operations for the eigenvalue
+                    decomposition.  For this method, M must be an array
+                    or matrix type.  This method should be avoided for
+                    large problems.
+
+    tol : float, default=1e-6
+        Tolerance for 'arpack' method
+        Not used if eigen_solver=='dense'.
+
+    max_iter : int, default=100
+        Maximum number of iterations for the arpack solver.
+
+    method : {'standard', 'hessian', 'modified', 'ltsa'}, default='standard'
+        standard : use the standard locally linear embedding algorithm.
+                   see reference [1]_
+        hessian  : use the Hessian eigenmap method.  This method requires
+                   n_neighbors > n_components * (1 + (n_components + 1) / 2.
+                   see reference [2]_
+        modified : use the modified locally linear embedding algorithm.
+                   see reference [3]_
+        ltsa     : use local tangent space alignment algorithm
+                   see reference [4]_
+
+    hessian_tol : float, default=1e-4
+        Tolerance for Hessian eigenmapping method.
+        Only used if method == 'hessian'.
+
+    modified_tol : float, default=1e-12
+        Tolerance for modified LLE method.
+        Only used if method == 'modified'.
+
+    random_state : int, RandomState instance, default=None
+        Determines the random number generator when ``solver`` == 'arpack'.
+        Pass an int for reproducible results across multiple function calls.
+        See :term:`Glossary <random_state>`.
+
+    n_jobs : int or None, default=None
+        The number of parallel jobs to run for neighbors search.
+        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.
+        ``-1`` means using all processors. See :term:`Glossary <n_jobs>`
+        for more details.
+
+    Returns
+    -------
+    Y : ndarray of shape (n_samples, n_components)
+        Embedding vectors.
+
+    squared_error : float
+        Reconstruction error for the embedding vectors. Equivalent to
+        ``norm(Y - W Y, 'fro')**2``, where W are the reconstruction weights.
+
+    References
+    ----------
+
+    .. [1] Roweis, S. & Saul, L. Nonlinear dimensionality reduction
+        by locally linear embedding.  Science 290:2323 (2000).
+    .. [2] Donoho, D. & Grimes, C. Hessian eigenmaps: Locally
+        linear embedding techniques for high-dimensional data.
+        Proc Natl Acad Sci U S A.  100:5591 (2003).
+    .. [3] `Zhang, Z. & Wang, J. MLLE: Modified Locally Linear
+        Embedding Using Multiple Weights.
+        <https://citeseerx.ist.psu.edu/doc_view/pid/0b060fdbd92cbcc66b383bcaa9ba5e5e624d7ee3>`_
+    .. [4] Zhang, Z. & Zha, H. Principal manifolds and nonlinear
+        dimensionality reduction via tangent space alignment.
+        Journal of Shanghai Univ.  8:406 (2004)
+
+    Examples
+    --------
+    >>> from sklearn.datasets import load_digits
+    >>> from sklearn.manifold import locally_linear_embedding
+    >>> X, _ = load_digits(return_X_y=True)
+    >>> X.shape
+    (1797, 64)
+    >>> embedding, _ = locally_linear_embedding(X[:100],n_neighbors=5, n_components=2)
+    >>> embedding.shape
+    (100, 2)
+    """
+    return _locally_linear_embedding(
+        X=X,
+        n_neighbors=n_neighbors,
+        n_components=n_components,
+        reg=reg,
+        eigen_solver=eigen_solver,
+        tol=tol,
+        max_iter=max_iter,
+        method=method,
+        hessian_tol=hessian_tol,
+        modified_tol=modified_tol,
+        random_state=random_state,
+        n_jobs=n_jobs,
+    )
+
+
+class LocallyLinearEmbedding(
+    ClassNamePrefixFeaturesOutMixin,
+    TransformerMixin,
+    _UnstableArchMixin,
+    BaseEstimator,
+):
+    """Locally Linear Embedding.
+
+    Read more in the :ref:`User Guide <locally_linear_embedding>`.
+
+    Parameters
+    ----------
+    n_neighbors : int, default=5
+        Number of neighbors to consider for each point.
+
+    n_components : int, default=2
+        Number of coordinates for the manifold.
+
+    reg : float, default=1e-3
+        Regularization constant, multiplies the trace of the local covariance
+        matrix of the distances.
+
+    eigen_solver : {'auto', 'arpack', 'dense'}, default='auto'
+        The solver used to compute the eigenvectors. The available options are:
+
+        - `'auto'` : algorithm will attempt to choose the best method for input
+          data.
+        - `'arpack'` : use arnoldi iteration in shift-invert mode. For this
+          method, M may be a dense matrix, sparse matrix, or general linear
+          operator.
+        - `'dense'`  : use standard dense matrix operations for the eigenvalue
+          decomposition. For this method, M must be an array or matrix type.
+          This method should be avoided for large problems.
+
+        .. warning::
+           ARPACK can be unstable for some problems.  It is best to try several
+           random seeds in order to check results.
+
+    tol : float, default=1e-6
+        Tolerance for 'arpack' method
+        Not used if eigen_solver=='dense'.
+
+    max_iter : int, default=100
+        Maximum number of iterations for the arpack solver.
+        Not used if eigen_solver=='dense'.
+
+    method : {'standard', 'hessian', 'modified', 'ltsa'}, default='standard'
+        - `standard`: use the standard locally linear embedding algorithm. see
+          reference [1]_
+        - `hessian`: use the Hessian eigenmap method. This method requires
+          ``n_neighbors > n_components * (1 + (n_components + 1) / 2``. see
+          reference [2]_
+        - `modified`: use the modified locally linear embedding algorithm.
+          see reference [3]_
+        - `ltsa`: use local tangent space alignment algorithm. see
+          reference [4]_
+
+    hessian_tol : float, default=1e-4
+        Tolerance for Hessian eigenmapping method.
+        Only used if ``method == 'hessian'``.
+
+    modified_tol : float, default=1e-12
+        Tolerance for modified LLE method.
+        Only used if ``method == 'modified'``.
+
+    neighbors_algorithm : {'auto', 'brute', 'kd_tree', 'ball_tree'}, \
+                          default='auto'
+        Algorithm to use for nearest neighbors search, passed to
+        :class:`~sklearn.neighbors.NearestNeighbors` instance.
+
+    random_state : int, RandomState instance, default=None
+        Determines the random number generator when
+        ``eigen_solver`` == 'arpack'. Pass an int for reproducible results
+        across multiple function calls. See :term:`Glossary <random_state>`.
+
+    n_jobs : int or None, default=None
+        The number of parallel jobs to run.
+        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.
+        ``-1`` means using all processors. See :term:`Glossary <n_jobs>`
+        for more details.
+
+    Attributes
+    ----------
+    embedding_ : array-like, shape [n_samples, n_components]
+        Stores the embedding vectors
+
+    reconstruction_error_ : float
+        Reconstruction error associated with `embedding_`
+
+    n_features_in_ : int
+        Number of features seen during :term:`fit`.
+
+        .. versionadded:: 0.24
+
+    feature_names_in_ : ndarray of shape (`n_features_in_`,)
+        Names of features seen during :term:`fit`. Defined only when `X`
+        has feature names that are all strings.
+
+        .. versionadded:: 1.0
+
+    nbrs_ : NearestNeighbors object
+        Stores nearest neighbors instance, including BallTree or KDtree
+        if applicable.
+
+    See Also
+    --------
+    SpectralEmbedding : Spectral embedding for non-linear dimensionality
+        reduction.
+    TSNE : Distributed Stochastic Neighbor Embedding.
+
+    References
+    ----------
+
+    .. [1] Roweis, S. & Saul, L. Nonlinear dimensionality reduction
+        by locally linear embedding.  Science 290:2323 (2000).
+    .. [2] Donoho, D. & Grimes, C. Hessian eigenmaps: Locally
+        linear embedding techniques for high-dimensional data.
+        Proc Natl Acad Sci U S A.  100:5591 (2003).
+    .. [3] `Zhang, Z. & Wang, J. MLLE: Modified Locally Linear
+        Embedding Using Multiple Weights.
+        <https://citeseerx.ist.psu.edu/doc_view/pid/0b060fdbd92cbcc66b383bcaa9ba5e5e624d7ee3>`_
+    .. [4] Zhang, Z. & Zha, H. Principal manifolds and nonlinear
+        dimensionality reduction via tangent space alignment.
+        Journal of Shanghai Univ.  8:406 (2004)
+
+    Examples
+    --------
+    >>> from sklearn.datasets import load_digits
+    >>> from sklearn.manifold import LocallyLinearEmbedding
+    >>> X, _ = load_digits(return_X_y=True)
+    >>> X.shape
+    (1797, 64)
+    >>> embedding = LocallyLinearEmbedding(n_components=2)
+    >>> X_transformed = embedding.fit_transform(X[:100])
+    >>> X_transformed.shape
+    (100, 2)
+    """
+
+    _parameter_constraints: dict = {
+        "n_neighbors": [Interval(Integral, 1, None, closed="left")],
+        "n_components": [Interval(Integral, 1, None, closed="left")],
+        "reg": [Interval(Real, 0, None, closed="left")],
+        "eigen_solver": [StrOptions({"auto", "arpack", "dense"})],
+        "tol": [Interval(Real, 0, None, closed="left")],
+        "max_iter": [Interval(Integral, 1, None, closed="left")],
+        "method": [StrOptions({"standard", "hessian", "modified", "ltsa"})],
+        "hessian_tol": [Interval(Real, 0, None, closed="left")],
+        "modified_tol": [Interval(Real, 0, None, closed="left")],
+        "neighbors_algorithm": [StrOptions({"auto", "brute", "kd_tree", "ball_tree"})],
+        "random_state": ["random_state"],
+        "n_jobs": [None, Integral],
+    }
+
+    def __init__(
+        self,
+        *,
+        n_neighbors=5,
+        n_components=2,
+        reg=1e-3,
+        eigen_solver="auto",
+        tol=1e-6,
+        max_iter=100,
+        method="standard",
+        hessian_tol=1e-4,
+        modified_tol=1e-12,
+        neighbors_algorithm="auto",
+        random_state=None,
+        n_jobs=None,
+    ):
+        self.n_neighbors = n_neighbors
+        self.n_components = n_components
+        self.reg = reg
+        self.eigen_solver = eigen_solver
+        self.tol = tol
+        self.max_iter = max_iter
+        self.method = method
+        self.hessian_tol = hessian_tol
+        self.modified_tol = modified_tol
+        self.random_state = random_state
+        self.neighbors_algorithm = neighbors_algorithm
+        self.n_jobs = n_jobs
+
+    def _fit_transform(self, X):
+        self.nbrs_ = NearestNeighbors(
+            n_neighbors=self.n_neighbors,
+            algorithm=self.neighbors_algorithm,
+            n_jobs=self.n_jobs,
+        )
+
+        random_state = check_random_state(self.random_state)
+        X = self._validate_data(X, dtype=float)
+        self.nbrs_.fit(X)
+        self.embedding_, self.reconstruction_error_ = _locally_linear_embedding(
+            X=self.nbrs_,
+            n_neighbors=self.n_neighbors,
+            n_components=self.n_components,
+            eigen_solver=self.eigen_solver,
+            tol=self.tol,
+            max_iter=self.max_iter,
+            method=self.method,
+            hessian_tol=self.hessian_tol,
+            modified_tol=self.modified_tol,
+            random_state=random_state,
+            reg=self.reg,
+            n_jobs=self.n_jobs,
+        )
+        self._n_features_out = self.embedding_.shape[1]
+
+    @_fit_context(prefer_skip_nested_validation=True)
+    def fit(self, X, y=None):
+        """Compute the embedding vectors for data X.
+
+        Parameters
+        ----------
+        X : array-like of shape (n_samples, n_features)
+            Training set.
+
+        y : Ignored
+            Not used, present here for API consistency by convention.
+
+        Returns
+        -------
+        self : object
+            Fitted `LocallyLinearEmbedding` class instance.
+        """
+        self._fit_transform(X)
+        return self
+
+    @_fit_context(prefer_skip_nested_validation=True)
+    def fit_transform(self, X, y=None):
+        """Compute the embedding vectors for data X and transform X.
+
+        Parameters
+        ----------
+        X : array-like of shape (n_samples, n_features)
+            Training set.
+
+        y : Ignored
+            Not used, present here for API consistency by convention.
+
+        Returns
+        -------
+        X_new : array-like, shape (n_samples, n_components)
+            Returns the instance itself.
+        """
+        self._fit_transform(X)
+        return self.embedding_
+
+    def transform(self, X):
+        """
+        Transform new points into embedding space.
+
+        Parameters
+        ----------
+        X : array-like of shape (n_samples, n_features)
+            Training set.
+
+        Returns
+        -------
+        X_new : ndarray of shape (n_samples, n_components)
+            Returns the instance itself.
+
+        Notes
+        -----
+        Because of scaling performed by this method, it is discouraged to use
+        it together with methods that are not scale-invariant (like SVMs).
+        """
+        check_is_fitted(self)
+
+        X = self._validate_data(X, reset=False)
+        ind = self.nbrs_.kneighbors(
+            X, n_neighbors=self.n_neighbors, return_distance=False
+        )
+        weights = barycenter_weights(X, self.nbrs_._fit_X, ind, reg=self.reg)
+        X_new = np.empty((X.shape[0], self.n_components))
+        for i in range(X.shape[0]):
+            X_new[i] = np.dot(self.embedding_[ind[i]].T, weights[i])
+        return X_new
@@ -0,0 +1,653 @@
+"""
+Multi-dimensional Scaling (MDS).
+"""
+
+# author: Nelle Varoquaux <nelle.varoquaux@gmail.com>
+# License: BSD
+
+import warnings
+from numbers import Integral, Real
+
+import numpy as np
+from joblib import effective_n_jobs
+
+from ..base import BaseEstimator, _fit_context
+from ..isotonic import IsotonicRegression
+from ..metrics import euclidean_distances
+from ..utils import check_array, check_random_state, check_symmetric
+from ..utils._param_validation import Interval, StrOptions, validate_params
+from ..utils.parallel import Parallel, delayed
+
+
+def _smacof_single(
+    dissimilarities,
+    metric=True,
+    n_components=2,
+    init=None,
+    max_iter=300,
+    verbose=0,
+    eps=1e-3,
+    random_state=None,
+    normalized_stress=False,
+):
+    """Computes multidimensional scaling using SMACOF algorithm.
+
+    Parameters
+    ----------
+    dissimilarities : ndarray of shape (n_samples, n_samples)
+        Pairwise dissimilarities between the points. Must be symmetric.
+
+    metric : bool, default=True
+        Compute metric or nonmetric SMACOF algorithm.
+        When ``False`` (i.e. non-metric MDS), dissimilarities with 0 are considered as
+        missing values.
+
+    n_components : int, default=2
+        Number of dimensions in which to immerse the dissimilarities. If an
+        ``init`` array is provided, this option is overridden and the shape of
+        ``init`` is used to determine the dimensionality of the embedding
+        space.
+
+    init : ndarray of shape (n_samples, n_components), default=None
+        Starting configuration of the embedding to initialize the algorithm. By
+        default, the algorithm is initialized with a randomly chosen array.
+
+    max_iter : int, default=300
+        Maximum number of iterations of the SMACOF algorithm for a single run.
+
+    verbose : int, default=0
+        Level of verbosity.
+
+    eps : float, default=1e-3
+        Relative tolerance with respect to stress at which to declare
+        convergence. The value of `eps` should be tuned separately depending
+        on whether or not `normalized_stress` is being used.
+
+    random_state : int, RandomState instance or None, default=None
+        Determines the random number generator used to initialize the centers.
+        Pass an int for reproducible results across multiple function calls.
+        See :term:`Glossary <random_state>`.
+
+    normalized_stress : bool, default=False
+        Whether use and return normed stress value (Stress-1) instead of raw
+        stress calculated by default. Only supported in non-metric MDS. The
+        caller must ensure that if `normalized_stress=True` then `metric=False`
+
+        .. versionadded:: 1.2
+
+    Returns
+    -------
+    X : ndarray of shape (n_samples, n_components)
+        Coordinates of the points in a ``n_components``-space.
+
+    stress : float
+        The final value of the stress (sum of squared distance of the
+        disparities and the distances for all constrained points).
+        If `normalized_stress=True`, and `metric=False` returns Stress-1.
+        A value of 0 indicates "perfect" fit, 0.025 excellent, 0.05 good,
+        0.1 fair, and 0.2 poor [1]_.
+
+    n_iter : int
+        The number of iterations corresponding to the best stress.
+
+    References
+    ----------
+    .. [1] "Nonmetric multidimensional scaling: a numerical method" Kruskal, J.
+           Psychometrika, 29 (1964)
+
+    .. [2] "Multidimensional scaling by optimizing goodness of fit to a nonmetric
+           hypothesis" Kruskal, J. Psychometrika, 29, (1964)
+
+    .. [3] "Modern Multidimensional Scaling - Theory and Applications" Borg, I.;
+           Groenen P. Springer Series in Statistics (1997)
+    """
+    dissimilarities = check_symmetric(dissimilarities, raise_exception=True)
+
+    n_samples = dissimilarities.shape[0]
+    random_state = check_random_state(random_state)
+
+    sim_flat = ((1 - np.tri(n_samples)) * dissimilarities).ravel()
+    sim_flat_w = sim_flat[sim_flat != 0]
+    if init is None:
+        # Randomly choose initial configuration
+        X = random_state.uniform(size=n_samples * n_components)
+        X = X.reshape((n_samples, n_components))
+    else:
+        # overrides the parameter p
+        n_components = init.shape[1]
+        if n_samples != init.shape[0]:
+            raise ValueError(
+                "init matrix should be of shape (%d, %d)" % (n_samples, n_components)
+            )
+        X = init
+
+    old_stress = None
+    ir = IsotonicRegression()
+    for it in range(max_iter):
+        # Compute distance and monotonic regression
+        dis = euclidean_distances(X)
+
+        if metric:
+            disparities = dissimilarities
+        else:
+            dis_flat = dis.ravel()
+            # dissimilarities with 0 are considered as missing values
+            dis_flat_w = dis_flat[sim_flat != 0]
+
+            # Compute the disparities using a monotonic regression
+            disparities_flat = ir.fit_transform(sim_flat_w, dis_flat_w)
+            disparities = dis_flat.copy()
+            disparities[sim_flat != 0] = disparities_flat
+            disparities = disparities.reshape((n_samples, n_samples))
+            disparities *= np.sqrt(
+                (n_samples * (n_samples - 1) / 2) / (disparities**2).sum()
+            )
+
+        # Compute stress
+        stress = ((dis.ravel() - disparities.ravel()) ** 2).sum() / 2
+        if normalized_stress:
+            stress = np.sqrt(stress / ((disparities.ravel() ** 2).sum() / 2))
+        # Update X using the Guttman transform
+        dis[dis == 0] = 1e-5
+        ratio = disparities / dis
+        B = -ratio
+        B[np.arange(len(B)), np.arange(len(B))] += ratio.sum(axis=1)
+        X = 1.0 / n_samples * np.dot(B, X)
+
+        dis = np.sqrt((X**2).sum(axis=1)).sum()
+        if verbose >= 2:
+            print("it: %d, stress %s" % (it, stress))
+        if old_stress is not None:
+            if (old_stress - stress / dis) < eps:
+                if verbose:
+                    print("breaking at iteration %d with stress %s" % (it, stress))
+                break
+        old_stress = stress / dis
+
+    return X, stress, it + 1
+
+
+@validate_params(
+    {
+        "dissimilarities": ["array-like"],
+        "metric": ["boolean"],
+        "n_components": [Interval(Integral, 1, None, closed="left")],
+        "init": ["array-like", None],
+        "n_init": [Interval(Integral, 1, None, closed="left")],
+        "n_jobs": [Integral, None],
+        "max_iter": [Interval(Integral, 1, None, closed="left")],
+        "verbose": ["verbose"],
+        "eps": [Interval(Real, 0, None, closed="left")],
+        "random_state": ["random_state"],
+        "return_n_iter": ["boolean"],
+        "normalized_stress": ["boolean", StrOptions({"auto"})],
+    },
+    prefer_skip_nested_validation=True,
+)
+def smacof(
+    dissimilarities,
+    *,
+    metric=True,
+    n_components=2,
+    init=None,
+    n_init=8,
+    n_jobs=None,
+    max_iter=300,
+    verbose=0,
+    eps=1e-3,
+    random_state=None,
+    return_n_iter=False,
+    normalized_stress="auto",
+):
+    """Compute multidimensional scaling using the SMACOF algorithm.
+
+    The SMACOF (Scaling by MAjorizing a COmplicated Function) algorithm is a
+    multidimensional scaling algorithm which minimizes an objective function
+    (the *stress*) using a majorization technique. Stress majorization, also
+    known as the Guttman Transform, guarantees a monotone convergence of
+    stress, and is more powerful than traditional techniques such as gradient
+    descent.
+
+    The SMACOF algorithm for metric MDS can be summarized by the following
+    steps:
+
+    1. Set an initial start configuration, randomly or not.
+    2. Compute the stress
+    3. Compute the Guttman Transform
+    4. Iterate 2 and 3 until convergence.
+
+    The nonmetric algorithm adds a monotonic regression step before computing
+    the stress.
+
+    Parameters
+    ----------
+    dissimilarities : array-like of shape (n_samples, n_samples)
+        Pairwise dissimilarities between the points. Must be symmetric.
+
+    metric : bool, default=True
+        Compute metric or nonmetric SMACOF algorithm.
+        When ``False`` (i.e. non-metric MDS), dissimilarities with 0 are considered as
+        missing values.
+
+    n_components : int, default=2
+        Number of dimensions in which to immerse the dissimilarities. If an
+        ``init`` array is provided, this option is overridden and the shape of
+        ``init`` is used to determine the dimensionality of the embedding
+        space.
+
+    init : array-like of shape (n_samples, n_components), default=None
+        Starting configuration of the embedding to initialize the algorithm. By
+        default, the algorithm is initialized with a randomly chosen array.
+
+    n_init : int, default=8
+        Number of times the SMACOF algorithm will be run with different
+        initializations. The final results will be the best output of the runs,
+        determined by the run with the smallest final stress. If ``init`` is
+        provided, this option is overridden and a single run is performed.
+
+    n_jobs : int, default=None
+        The number of jobs to use for the computation. If multiple
+        initializations are used (``n_init``), each run of the algorithm is
+        computed in parallel.
+
+        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.
+        ``-1`` means using all processors. See :term:`Glossary <n_jobs>`
+        for more details.
+
+    max_iter : int, default=300
+        Maximum number of iterations of the SMACOF algorithm for a single run.
+
+    verbose : int, default=0
+        Level of verbosity.
+
+    eps : float, default=1e-3
+        Relative tolerance with respect to stress at which to declare
+        convergence. The value of `eps` should be tuned separately depending
+        on whether or not `normalized_stress` is being used.
+
+    random_state : int, RandomState instance or None, default=None
+        Determines the random number generator used to initialize the centers.
+        Pass an int for reproducible results across multiple function calls.
+        See :term:`Glossary <random_state>`.
+
+    return_n_iter : bool, default=False
+        Whether or not to return the number of iterations.
+
+    normalized_stress : bool or "auto" default="auto"
+        Whether use and return normed stress value (Stress-1) instead of raw
+        stress calculated by default. Only supported in non-metric MDS.
+
+        .. versionadded:: 1.2
+
+        .. versionchanged:: 1.4
+           The default value changed from `False` to `"auto"` in version 1.4.
+
+    Returns
+    -------
+    X : ndarray of shape (n_samples, n_components)
+        Coordinates of the points in a ``n_components``-space.
+
+    stress : float
+        The final value of the stress (sum of squared distance of the
+        disparities and the distances for all constrained points).
+        If `normalized_stress=True`, and `metric=False` returns Stress-1.
+        A value of 0 indicates "perfect" fit, 0.025 excellent, 0.05 good,
+        0.1 fair, and 0.2 poor [1]_.
+
+    n_iter : int
+        The number of iterations corresponding to the best stress. Returned
+        only if ``return_n_iter`` is set to ``True``.
+
+    References
+    ----------
+    .. [1] "Nonmetric multidimensional scaling: a numerical method" Kruskal, J.
+           Psychometrika, 29 (1964)
+
+    .. [2] "Multidimensional scaling by optimizing goodness of fit to a nonmetric
+           hypothesis" Kruskal, J. Psychometrika, 29, (1964)
+
+    .. [3] "Modern Multidimensional Scaling - Theory and Applications" Borg, I.;
+           Groenen P. Springer Series in Statistics (1997)
+
+    Examples
+    --------
+    >>> import numpy as np
+    >>> from sklearn.manifold import smacof
+    >>> from sklearn.metrics import euclidean_distances
+    >>> X = np.array([[0, 1, 2], [1, 0, 3],[2, 3, 0]])
+    >>> dissimilarities = euclidean_distances(X)
+    >>> mds_result, stress = smacof(dissimilarities, n_components=2, random_state=42)
+    >>> mds_result
+    array([[ 0.05... -1.07... ],
+           [ 1.74..., -0.75...],
+           [-1.79...,  1.83...]])
+    >>> stress
+    0.0012...
+    """
+
+    dissimilarities = check_array(dissimilarities)
+    random_state = check_random_state(random_state)
+
+    if normalized_stress == "auto":
+        normalized_stress = not metric
+
+    if normalized_stress and metric:
+        raise ValueError(
+            "Normalized stress is not supported for metric MDS. Either set"
+            " `normalized_stress=False` or use `metric=False`."
+        )
+    if hasattr(init, "__array__"):
+        init = np.asarray(init).copy()
+        if not n_init == 1:
+            warnings.warn(
+                "Explicit initial positions passed: "
+                "performing only one init of the MDS instead of %d" % n_init
+            )
+            n_init = 1
+
+    best_pos, best_stress = None, None
+
+    if effective_n_jobs(n_jobs) == 1:
+        for it in range(n_init):
+            pos, stress, n_iter_ = _smacof_single(
+                dissimilarities,
+                metric=metric,
+                n_components=n_components,
+                init=init,
+                max_iter=max_iter,
+                verbose=verbose,
+                eps=eps,
+                random_state=random_state,
+                normalized_stress=normalized_stress,
+            )
+            if best_stress is None or stress < best_stress:
+                best_stress = stress
+                best_pos = pos.copy()
+                best_iter = n_iter_
+    else:
+        seeds = random_state.randint(np.iinfo(np.int32).max, size=n_init)
+        results = Parallel(n_jobs=n_jobs, verbose=max(verbose - 1, 0))(
+            delayed(_smacof_single)(
+                dissimilarities,
+                metric=metric,
+                n_components=n_components,
+                init=init,
+                max_iter=max_iter,
+                verbose=verbose,
+                eps=eps,
+                random_state=seed,
+                normalized_stress=normalized_stress,
+            )
+            for seed in seeds
+        )
+        positions, stress, n_iters = zip(*results)
+        best = np.argmin(stress)
+        best_stress = stress[best]
+        best_pos = positions[best]
+        best_iter = n_iters[best]
+
+    if return_n_iter:
+        return best_pos, best_stress, best_iter
+    else:
+        return best_pos, best_stress
+
+
+class MDS(BaseEstimator):
+    """Multidimensional scaling.
+
+    Read more in the :ref:`User Guide <multidimensional_scaling>`.
+
+    Parameters
+    ----------
+    n_components : int, default=2
+        Number of dimensions in which to immerse the dissimilarities.
+
+    metric : bool, default=True
+        If ``True``, perform metric MDS; otherwise, perform nonmetric MDS.
+        When ``False`` (i.e. non-metric MDS), dissimilarities with 0 are considered as
+        missing values.
+
+    n_init : int, default=4
+        Number of times the SMACOF algorithm will be run with different
+        initializations. The final results will be the best output of the runs,
+        determined by the run with the smallest final stress.
+
+    max_iter : int, default=300
+        Maximum number of iterations of the SMACOF algorithm for a single run.
+
+    verbose : int, default=0
+        Level of verbosity.
+
+    eps : float, default=1e-3
+        Relative tolerance with respect to stress at which to declare
+        convergence. The value of `eps` should be tuned separately depending
+        on whether or not `normalized_stress` is being used.
+
+    n_jobs : int, default=None
+        The number of jobs to use for the computation. If multiple
+        initializations are used (``n_init``), each run of the algorithm is
+        computed in parallel.
+
+        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.
+        ``-1`` means using all processors. See :term:`Glossary <n_jobs>`
+        for more details.
+
+    random_state : int, RandomState instance or None, default=None
+        Determines the random number generator used to initialize the centers.
+        Pass an int for reproducible results across multiple function calls.
+        See :term:`Glossary <random_state>`.
+
+    dissimilarity : {'euclidean', 'precomputed'}, default='euclidean'
+        Dissimilarity measure to use:
+
+        - 'euclidean':
+            Pairwise Euclidean distances between points in the dataset.
+
+        - 'precomputed':
+            Pre-computed dissimilarities are passed directly to ``fit`` and
+            ``fit_transform``.
+
+    normalized_stress : bool or "auto" default="auto"
+        Whether use and return normed stress value (Stress-1) instead of raw
+        stress calculated by default. Only supported in non-metric MDS.
+
+        .. versionadded:: 1.2
+
+        .. versionchanged:: 1.4
+           The default value changed from `False` to `"auto"` in version 1.4.
+
+    Attributes
+    ----------
+    embedding_ : ndarray of shape (n_samples, n_components)
+        Stores the position of the dataset in the embedding space.
+
+    stress_ : float
+        The final value of the stress (sum of squared distance of the
+        disparities and the distances for all constrained points).
+        If `normalized_stress=True`, and `metric=False` returns Stress-1.
+        A value of 0 indicates "perfect" fit, 0.025 excellent, 0.05 good,
+        0.1 fair, and 0.2 poor [1]_.
+
+    dissimilarity_matrix_ : ndarray of shape (n_samples, n_samples)
+        Pairwise dissimilarities between the points. Symmetric matrix that:
+
+        - either uses a custom dissimilarity matrix by setting `dissimilarity`
+          to 'precomputed';
+        - or constructs a dissimilarity matrix from data using
+          Euclidean distances.
+
+    n_features_in_ : int
+        Number of features seen during :term:`fit`.
+
+        .. versionadded:: 0.24
+
+    feature_names_in_ : ndarray of shape (`n_features_in_`,)
+        Names of features seen during :term:`fit`. Defined only when `X`
+        has feature names that are all strings.
+
+        .. versionadded:: 1.0
+
+    n_iter_ : int
+        The number of iterations corresponding to the best stress.
+
+    See Also
+    --------
+    sklearn.decomposition.PCA : Principal component analysis that is a linear
+        dimensionality reduction method.
+    sklearn.decomposition.KernelPCA : Non-linear dimensionality reduction using
+        kernels and PCA.
+    TSNE : T-distributed Stochastic Neighbor Embedding.
+    Isomap : Manifold learning based on Isometric Mapping.
+    LocallyLinearEmbedding : Manifold learning using Locally Linear Embedding.
+    SpectralEmbedding : Spectral embedding for non-linear dimensionality.
+
+    References
+    ----------
+    .. [1] "Nonmetric multidimensional scaling: a numerical method" Kruskal, J.
+       Psychometrika, 29 (1964)
+
+    .. [2] "Multidimensional scaling by optimizing goodness of fit to a nonmetric
+       hypothesis" Kruskal, J. Psychometrika, 29, (1964)
+
+    .. [3] "Modern Multidimensional Scaling - Theory and Applications" Borg, I.;
+       Groenen P. Springer Series in Statistics (1997)
+
+    Examples
+    --------
+    >>> from sklearn.datasets import load_digits
+    >>> from sklearn.manifold import MDS
+    >>> X, _ = load_digits(return_X_y=True)
+    >>> X.shape
+    (1797, 64)
+    >>> embedding = MDS(n_components=2, normalized_stress='auto')
+    >>> X_transformed = embedding.fit_transform(X[:100])
+    >>> X_transformed.shape
+    (100, 2)
+
+    For a more detailed example of usage, see:
+    :ref:`sphx_glr_auto_examples_manifold_plot_mds.py`
+    """
+
+    _parameter_constraints: dict = {
+        "n_components": [Interval(Integral, 1, None, closed="left")],
+        "metric": ["boolean"],
+        "n_init": [Interval(Integral, 1, None, closed="left")],
+        "max_iter": [Interval(Integral, 1, None, closed="left")],
+        "verbose": ["verbose"],
+        "eps": [Interval(Real, 0.0, None, closed="left")],
+        "n_jobs": [None, Integral],
+        "random_state": ["random_state"],
+        "dissimilarity": [StrOptions({"euclidean", "precomputed"})],
+        "normalized_stress": ["boolean", StrOptions({"auto"})],
+    }
+
+    def __init__(
+        self,
+        n_components=2,
+        *,
+        metric=True,
+        n_init=4,
+        max_iter=300,
+        verbose=0,
+        eps=1e-3,
+        n_jobs=None,
+        random_state=None,
+        dissimilarity="euclidean",
+        normalized_stress="auto",
+    ):
+        self.n_components = n_components
+        self.dissimilarity = dissimilarity
+        self.metric = metric
+        self.n_init = n_init
+        self.max_iter = max_iter
+        self.eps = eps
+        self.verbose = verbose
+        self.n_jobs = n_jobs
+        self.random_state = random_state
+        self.normalized_stress = normalized_stress
+
+    def _more_tags(self):
+        return {"pairwise": self.dissimilarity == "precomputed"}
+
+    def fit(self, X, y=None, init=None):
+        """
+        Compute the position of the points in the embedding space.
+
+        Parameters
+        ----------
+        X : array-like of shape (n_samples, n_features) or \
+                (n_samples, n_samples)
+            Input data. If ``dissimilarity=='precomputed'``, the input should
+            be the dissimilarity matrix.
+
+        y : Ignored
+            Not used, present for API consistency by convention.
+
+        init : ndarray of shape (n_samples, n_components), default=None
+            Starting configuration of the embedding to initialize the SMACOF
+            algorithm. By default, the algorithm is initialized with a randomly
+            chosen array.
+
+        Returns
+        -------
+        self : object
+            Fitted estimator.
+        """
+        self.fit_transform(X, init=init)
+        return self
+
+    @_fit_context(prefer_skip_nested_validation=True)
+    def fit_transform(self, X, y=None, init=None):
+        """
+        Fit the data from `X`, and returns the embedded coordinates.
+
+        Parameters
+        ----------
+        X : array-like of shape (n_samples, n_features) or \
+                (n_samples, n_samples)
+            Input data. If ``dissimilarity=='precomputed'``, the input should
+            be the dissimilarity matrix.
+
+        y : Ignored
+            Not used, present for API consistency by convention.
+
+        init : ndarray of shape (n_samples, n_components), default=None
+            Starting configuration of the embedding to initialize the SMACOF
+            algorithm. By default, the algorithm is initialized with a randomly
+            chosen array.
+
+        Returns
+        -------
+        X_new : ndarray of shape (n_samples, n_components)
+            X transformed in the new space.
+        """
+        X = self._validate_data(X)
+        if X.shape[0] == X.shape[1] and self.dissimilarity != "precomputed":
+            warnings.warn(
+                "The MDS API has changed. ``fit`` now constructs an"
+                " dissimilarity matrix from data. To use a custom "
+                "dissimilarity matrix, set "
+                "``dissimilarity='precomputed'``."
+            )
+
+        if self.dissimilarity == "precomputed":
+            self.dissimilarity_matrix_ = X
+        elif self.dissimilarity == "euclidean":
+            self.dissimilarity_matrix_ = euclidean_distances(X)
+
+        self.embedding_, self.stress_, self.n_iter_ = smacof(
+            self.dissimilarity_matrix_,
+            metric=self.metric,
+            n_components=self.n_components,
+            init=init,
+            n_init=self.n_init,
+            n_jobs=self.n_jobs,
+            max_iter=self.max_iter,
+            verbose=self.verbose,
+            eps=self.eps,
+            random_state=self.random_state,
+            return_n_iter=True,
+            normalized_stress=self.normalized_stress,
+        )
+
+        return self.embedding_
@@ -0,0 +1,778 @@
+"""Spectral Embedding."""
+
+# Author: Gael Varoquaux <gael.varoquaux@normalesup.org>
+#         Wei LI <kuantkid@gmail.com>
+# License: BSD 3 clause
+
+
+import warnings
+from numbers import Integral, Real
+
+import numpy as np
+from scipy import sparse
+from scipy.linalg import eigh
+from scipy.sparse.csgraph import connected_components
+from scipy.sparse.linalg import eigsh, lobpcg
+
+from ..base import BaseEstimator, _fit_context
+from ..metrics.pairwise import rbf_kernel
+from ..neighbors import NearestNeighbors, kneighbors_graph
+from ..utils import (
+    check_array,
+    check_random_state,
+    check_symmetric,
+)
+from ..utils._arpack import _init_arpack_v0
+from ..utils._param_validation import Interval, StrOptions, validate_params
+from ..utils.extmath import _deterministic_vector_sign_flip
+from ..utils.fixes import laplacian as csgraph_laplacian
+from ..utils.fixes import parse_version, sp_version
+
+
+def _graph_connected_component(graph, node_id):
+    """Find the largest graph connected components that contains one
+    given node.
+
+    Parameters
+    ----------
+    graph : array-like of shape (n_samples, n_samples)
+        Adjacency matrix of the graph, non-zero weight means an edge
+        between the nodes.
+
+    node_id : int
+        The index of the query node of the graph.
+
+    Returns
+    -------
+    connected_components_matrix : array-like of shape (n_samples,)
+        An array of bool value indicating the indexes of the nodes
+        belonging to the largest connected components of the given query
+        node.
+    """
+    n_node = graph.shape[0]
+    if sparse.issparse(graph):
+        # speed up row-wise access to boolean connection mask
+        graph = graph.tocsr()
+    connected_nodes = np.zeros(n_node, dtype=bool)
+    nodes_to_explore = np.zeros(n_node, dtype=bool)
+    nodes_to_explore[node_id] = True
+    for _ in range(n_node):
+        last_num_component = connected_nodes.sum()
+        np.logical_or(connected_nodes, nodes_to_explore, out=connected_nodes)
+        if last_num_component >= connected_nodes.sum():
+            break
+        indices = np.where(nodes_to_explore)[0]
+        nodes_to_explore.fill(False)
+        for i in indices:
+            if sparse.issparse(graph):
+                # scipy not yet implemented 1D sparse slices; can be changed back to
+                # `neighbors = graph[i].toarray().ravel()` once implemented
+                neighbors = graph[[i], :].toarray().ravel()
+            else:
+                neighbors = graph[i]
+            np.logical_or(nodes_to_explore, neighbors, out=nodes_to_explore)
+    return connected_nodes
+
+
+def _graph_is_connected(graph):
+    """Return whether the graph is connected (True) or Not (False).
+
+    Parameters
+    ----------
+    graph : {array-like, sparse matrix} of shape (n_samples, n_samples)
+        Adjacency matrix of the graph, non-zero weight means an edge
+        between the nodes.
+
+    Returns
+    -------
+    is_connected : bool
+        True means the graph is fully connected and False means not.
+    """
+    if sparse.issparse(graph):
+        # Before Scipy 1.11.3, `connected_components` only supports 32-bit indices.
+        # PR: https://github.com/scipy/scipy/pull/18913
+        # First integration in 1.11.3: https://github.com/scipy/scipy/pull/19279
+        # TODO(jjerphan): Once SciPy 1.11.3 is the minimum supported version, use
+        # `accept_large_sparse=True`.
+        accept_large_sparse = sp_version >= parse_version("1.11.3")
+        graph = check_array(
+            graph, accept_sparse=True, accept_large_sparse=accept_large_sparse
+        )
+        # sparse graph, find all the connected components
+        n_connected_components, _ = connected_components(graph)
+        return n_connected_components == 1
+    else:
+        # dense graph, find all connected components start from node 0
+        return _graph_connected_component(graph, 0).sum() == graph.shape[0]
+
+
+def _set_diag(laplacian, value, norm_laplacian):
+    """Set the diagonal of the laplacian matrix and convert it to a
+    sparse format well suited for eigenvalue decomposition.
+
+    Parameters
+    ----------
+    laplacian : {ndarray, sparse matrix}
+        The graph laplacian.
+
+    value : float
+        The value of the diagonal.
+
+    norm_laplacian : bool
+        Whether the value of the diagonal should be changed or not.
+
+    Returns
+    -------
+    laplacian : {array, sparse matrix}
+        An array of matrix in a form that is well suited to fast
+        eigenvalue decomposition, depending on the band width of the
+        matrix.
+    """
+    n_nodes = laplacian.shape[0]
+    # We need all entries in the diagonal to values
+    if not sparse.issparse(laplacian):
+        if norm_laplacian:
+            laplacian.flat[:: n_nodes + 1] = value
+    else:
+        laplacian = laplacian.tocoo()
+        if norm_laplacian:
+            diag_idx = laplacian.row == laplacian.col
+            laplacian.data[diag_idx] = value
+        # If the matrix has a small number of diagonals (as in the
+        # case of structured matrices coming from images), the
+        # dia format might be best suited for matvec products:
+        n_diags = np.unique(laplacian.row - laplacian.col).size
+        if n_diags <= 7:
+            # 3 or less outer diagonals on each side
+            laplacian = laplacian.todia()
+        else:
+            # csr has the fastest matvec and is thus best suited to
+            # arpack
+            laplacian = laplacian.tocsr()
+    return laplacian
+
+
+@validate_params(
+    {
+        "adjacency": ["array-like", "sparse matrix"],
+        "n_components": [Interval(Integral, 1, None, closed="left")],
+        "eigen_solver": [StrOptions({"arpack", "lobpcg", "amg"}), None],
+        "random_state": ["random_state"],
+        "eigen_tol": [Interval(Real, 0, None, closed="left"), StrOptions({"auto"})],
+        "norm_laplacian": ["boolean"],
+        "drop_first": ["boolean"],
+    },
+    prefer_skip_nested_validation=True,
+)
+def spectral_embedding(
+    adjacency,
+    *,
+    n_components=8,
+    eigen_solver=None,
+    random_state=None,
+    eigen_tol="auto",
+    norm_laplacian=True,
+    drop_first=True,
+):
+    """Project the sample on the first eigenvectors of the graph Laplacian.
+
+    The adjacency matrix is used to compute a normalized graph Laplacian
+    whose spectrum (especially the eigenvectors associated to the
+    smallest eigenvalues) has an interpretation in terms of minimal
+    number of cuts necessary to split the graph into comparably sized
+    components.
+
+    This embedding can also 'work' even if the ``adjacency`` variable is
+    not strictly the adjacency matrix of a graph but more generally
+    an affinity or similarity matrix between samples (for instance the
+    heat kernel of a euclidean distance matrix or a k-NN matrix).
+
+    However care must taken to always make the affinity matrix symmetric
+    so that the eigenvector decomposition works as expected.
+
+    Note : Laplacian Eigenmaps is the actual algorithm implemented here.
+
+    Read more in the :ref:`User Guide <spectral_embedding>`.
+
+    Parameters
+    ----------
+    adjacency : {array-like, sparse graph} of shape (n_samples, n_samples)
+        The adjacency matrix of the graph to embed.
+
+    n_components : int, default=8
+        The dimension of the projection subspace.
+
+    eigen_solver : {'arpack', 'lobpcg', 'amg'}, default=None
+        The eigenvalue decomposition strategy to use. AMG requires pyamg
+        to be installed. It can be faster on very large, sparse problems,
+        but may also lead to instabilities. If None, then ``'arpack'`` is
+        used.
+
+    random_state : int, RandomState instance or None, default=None
+        A pseudo random number generator used for the initialization
+        of the lobpcg eigen vectors decomposition when `eigen_solver ==
+        'amg'`, and for the K-Means initialization. Use an int to make
+        the results deterministic across calls (See
+        :term:`Glossary <random_state>`).
+
+        .. note::
+            When using `eigen_solver == 'amg'`,
+            it is necessary to also fix the global numpy seed with
+            `np.random.seed(int)` to get deterministic results. See
+            https://github.com/pyamg/pyamg/issues/139 for further
+            information.
+
+    eigen_tol : float, default="auto"
+        Stopping criterion for eigendecomposition of the Laplacian matrix.
+        If `eigen_tol="auto"` then the passed tolerance will depend on the
+        `eigen_solver`:
+
+        - If `eigen_solver="arpack"`, then `eigen_tol=0.0`;
+        - If `eigen_solver="lobpcg"` or `eigen_solver="amg"`, then
+          `eigen_tol=None` which configures the underlying `lobpcg` solver to
+          automatically resolve the value according to their heuristics. See,
+          :func:`scipy.sparse.linalg.lobpcg` for details.
+
+        Note that when using `eigen_solver="amg"` values of `tol<1e-5` may lead
+        to convergence issues and should be avoided.
+
+        .. versionadded:: 1.2
+           Added 'auto' option.
+
+    norm_laplacian : bool, default=True
+        If True, then compute symmetric normalized Laplacian.
+
+    drop_first : bool, default=True
+        Whether to drop the first eigenvector. For spectral embedding, this
+        should be True as the first eigenvector should be constant vector for
+        connected graph, but for spectral clustering, this should be kept as
+        False to retain the first eigenvector.
+
+    Returns
+    -------
+    embedding : ndarray of shape (n_samples, n_components)
+        The reduced samples.
+
+    Notes
+    -----
+    Spectral Embedding (Laplacian Eigenmaps) is most useful when the graph
+    has one connected component. If there graph has many components, the first
+    few eigenvectors will simply uncover the connected components of the graph.
+
+    References
+    ----------
+    * https://en.wikipedia.org/wiki/LOBPCG
+
+    * :doi:`"Toward the Optimal Preconditioned Eigensolver: Locally Optimal
+      Block Preconditioned Conjugate Gradient Method",
+      Andrew V. Knyazev
+      <10.1137/S1064827500366124>`
+
+    Examples
+    --------
+    >>> from sklearn.datasets import load_digits
+    >>> from sklearn.neighbors import kneighbors_graph
+    >>> from sklearn.manifold import spectral_embedding
+    >>> X, _ = load_digits(return_X_y=True)
+    >>> X = X[:100]
+    >>> affinity_matrix = kneighbors_graph(
+    ...     X, n_neighbors=int(X.shape[0] / 10), include_self=True
+    ... )
+    >>> # make the matrix symmetric
+    >>> affinity_matrix = 0.5 * (affinity_matrix + affinity_matrix.T)
+    >>> embedding = spectral_embedding(affinity_matrix, n_components=2, random_state=42)
+    >>> embedding.shape
+    (100, 2)
+    """
+    random_state = check_random_state(random_state)
+
+    return _spectral_embedding(
+        adjacency,
+        n_components=n_components,
+        eigen_solver=eigen_solver,
+        random_state=random_state,
+        eigen_tol=eigen_tol,
+        norm_laplacian=norm_laplacian,
+        drop_first=drop_first,
+    )
+
+
+def _spectral_embedding(
+    adjacency,
+    *,
+    n_components=8,
+    eigen_solver=None,
+    random_state=None,
+    eigen_tol="auto",
+    norm_laplacian=True,
+    drop_first=True,
+):
+    adjacency = check_symmetric(adjacency)
+
+    if eigen_solver == "amg":
+        try:
+            from pyamg import smoothed_aggregation_solver
+        except ImportError as e:
+            raise ValueError(
+                "The eigen_solver was set to 'amg', but pyamg is not available."
+            ) from e
+
+    if eigen_solver is None:
+        eigen_solver = "arpack"
+
+    n_nodes = adjacency.shape[0]
+    # Whether to drop the first eigenvector
+    if drop_first:
+        n_components = n_components + 1
+
+    if not _graph_is_connected(adjacency):
+        warnings.warn(
+            "Graph is not fully connected, spectral embedding may not work as expected."
+        )
+
+    laplacian, dd = csgraph_laplacian(
+        adjacency, normed=norm_laplacian, return_diag=True
+    )
+    if (
+        eigen_solver == "arpack"
+        or eigen_solver != "lobpcg"
+        and (not sparse.issparse(laplacian) or n_nodes < 5 * n_components)
+    ):
+        # lobpcg used with eigen_solver='amg' has bugs for low number of nodes
+        # for details see the source code in scipy:
+        # https://github.com/scipy/scipy/blob/v0.11.0/scipy/sparse/linalg/eigen
+        # /lobpcg/lobpcg.py#L237
+        # or matlab:
+        # https://www.mathworks.com/matlabcentral/fileexchange/48-lobpcg-m
+        laplacian = _set_diag(laplacian, 1, norm_laplacian)
+
+        # Here we'll use shift-invert mode for fast eigenvalues
+        # (see https://docs.scipy.org/doc/scipy/reference/tutorial/arpack.html
+        #  for a short explanation of what this means)
+        # Because the normalized Laplacian has eigenvalues between 0 and 2,
+        # I - L has eigenvalues between -1 and 1.  ARPACK is most efficient
+        # when finding eigenvalues of largest magnitude (keyword which='LM')
+        # and when these eigenvalues are very large compared to the rest.
+        # For very large, very sparse graphs, I - L can have many, many
+        # eigenvalues very near 1.0.  This leads to slow convergence.  So
+        # instead, we'll use ARPACK's shift-invert mode, asking for the
+        # eigenvalues near 1.0.  This effectively spreads-out the spectrum
+        # near 1.0 and leads to much faster convergence: potentially an
+        # orders-of-magnitude speedup over simply using keyword which='LA'
+        # in standard mode.
+        try:
+            # We are computing the opposite of the laplacian inplace so as
+            # to spare a memory allocation of a possibly very large array
+            tol = 0 if eigen_tol == "auto" else eigen_tol
+            laplacian *= -1
+            v0 = _init_arpack_v0(laplacian.shape[0], random_state)
+            laplacian = check_array(
+                laplacian, accept_sparse="csr", accept_large_sparse=False
+            )
+            _, diffusion_map = eigsh(
+                laplacian, k=n_components, sigma=1.0, which="LM", tol=tol, v0=v0
+            )
+            embedding = diffusion_map.T[n_components::-1]
+            if norm_laplacian:
+                # recover u = D^-1/2 x from the eigenvector output x
+                embedding = embedding / dd
+        except RuntimeError:
+            # When submatrices are exactly singular, an LU decomposition
+            # in arpack fails. We fallback to lobpcg
+            eigen_solver = "lobpcg"
+            # Revert the laplacian to its opposite to have lobpcg work
+            laplacian *= -1
+
+    elif eigen_solver == "amg":
+        # Use AMG to get a preconditioner and speed up the eigenvalue
+        # problem.
+        if not sparse.issparse(laplacian):
+            warnings.warn("AMG works better for sparse matrices")
+        laplacian = check_array(
+            laplacian, dtype=[np.float64, np.float32], accept_sparse=True
+        )
+        laplacian = _set_diag(laplacian, 1, norm_laplacian)
+
+        # The Laplacian matrix is always singular, having at least one zero
+        # eigenvalue, corresponding to the trivial eigenvector, which is a
+        # constant. Using a singular matrix for preconditioning may result in
+        # random failures in LOBPCG and is not supported by the existing
+        # theory:
+        #     see https://doi.org/10.1007/s10208-015-9297-1
+        # Shift the Laplacian so its diagononal is not all ones. The shift
+        # does change the eigenpairs however, so we'll feed the shifted
+        # matrix to the solver and afterward set it back to the original.
+        diag_shift = 1e-5 * sparse.eye(laplacian.shape[0])
+        laplacian += diag_shift
+        if hasattr(sparse, "csr_array") and isinstance(laplacian, sparse.csr_array):
+            # `pyamg` does not work with `csr_array` and we need to convert it to a
+            # `csr_matrix` object.
+            laplacian = sparse.csr_matrix(laplacian)
+        ml = smoothed_aggregation_solver(check_array(laplacian, accept_sparse="csr"))
+        laplacian -= diag_shift
+
+        M = ml.aspreconditioner()
+        # Create initial approximation X to eigenvectors
+        X = random_state.standard_normal(size=(laplacian.shape[0], n_components + 1))
+        X[:, 0] = dd.ravel()
+        X = X.astype(laplacian.dtype)
+
+        tol = None if eigen_tol == "auto" else eigen_tol
+        _, diffusion_map = lobpcg(laplacian, X, M=M, tol=tol, largest=False)
+        embedding = diffusion_map.T
+        if norm_laplacian:
+            # recover u = D^-1/2 x from the eigenvector output x
+            embedding = embedding / dd
+        if embedding.shape[0] == 1:
+            raise ValueError
+
+    if eigen_solver == "lobpcg":
+        laplacian = check_array(
+            laplacian, dtype=[np.float64, np.float32], accept_sparse=True
+        )
+        if n_nodes < 5 * n_components + 1:
+            # see note above under arpack why lobpcg has problems with small
+            # number of nodes
+            # lobpcg will fallback to eigh, so we short circuit it
+            if sparse.issparse(laplacian):
+                laplacian = laplacian.toarray()
+            _, diffusion_map = eigh(laplacian, check_finite=False)
+            embedding = diffusion_map.T[:n_components]
+            if norm_laplacian:
+                # recover u = D^-1/2 x from the eigenvector output x
+                embedding = embedding / dd
+        else:
+            laplacian = _set_diag(laplacian, 1, norm_laplacian)
+            # We increase the number of eigenvectors requested, as lobpcg
+            # doesn't behave well in low dimension and create initial
+            # approximation X to eigenvectors
+            X = random_state.standard_normal(
+                size=(laplacian.shape[0], n_components + 1)
+            )
+            X[:, 0] = dd.ravel()
+            X = X.astype(laplacian.dtype)
+            tol = None if eigen_tol == "auto" else eigen_tol
+            _, diffusion_map = lobpcg(
+                laplacian, X, tol=tol, largest=False, maxiter=2000
+            )
+            embedding = diffusion_map.T[:n_components]
+            if norm_laplacian:
+                # recover u = D^-1/2 x from the eigenvector output x
+                embedding = embedding / dd
+            if embedding.shape[0] == 1:
+                raise ValueError
+
+    embedding = _deterministic_vector_sign_flip(embedding)
+    if drop_first:
+        return embedding[1:n_components].T
+    else:
+        return embedding[:n_components].T
+
+
+class SpectralEmbedding(BaseEstimator):
+    """Spectral embedding for non-linear dimensionality reduction.
+
+    Forms an affinity matrix given by the specified function and
+    applies spectral decomposition to the corresponding graph laplacian.
+    The resulting transformation is given by the value of the
+    eigenvectors for each data point.
+
+    Note : Laplacian Eigenmaps is the actual algorithm implemented here.
+
+    Read more in the :ref:`User Guide <spectral_embedding>`.
+
+    Parameters
+    ----------
+    n_components : int, default=2
+        The dimension of the projected subspace.
+
+    affinity : {'nearest_neighbors', 'rbf', 'precomputed', \
+                'precomputed_nearest_neighbors'} or callable, \
+                default='nearest_neighbors'
+        How to construct the affinity matrix.
+         - 'nearest_neighbors' : construct the affinity matrix by computing a
+           graph of nearest neighbors.
+         - 'rbf' : construct the affinity matrix by computing a radial basis
+           function (RBF) kernel.
+         - 'precomputed' : interpret ``X`` as a precomputed affinity matrix.
+         - 'precomputed_nearest_neighbors' : interpret ``X`` as a sparse graph
+           of precomputed nearest neighbors, and constructs the affinity matrix
+           by selecting the ``n_neighbors`` nearest neighbors.
+         - callable : use passed in function as affinity
+           the function takes in data matrix (n_samples, n_features)
+           and return affinity matrix (n_samples, n_samples).
+
+    gamma : float, default=None
+        Kernel coefficient for rbf kernel. If None, gamma will be set to
+        1/n_features.
+
+    random_state : int, RandomState instance or None, default=None
+        A pseudo random number generator used for the initialization
+        of the lobpcg eigen vectors decomposition when `eigen_solver ==
+        'amg'`, and for the K-Means initialization. Use an int to make
+        the results deterministic across calls (See
+        :term:`Glossary <random_state>`).
+
+        .. note::
+            When using `eigen_solver == 'amg'`,
+            it is necessary to also fix the global numpy seed with
+            `np.random.seed(int)` to get deterministic results. See
+            https://github.com/pyamg/pyamg/issues/139 for further
+            information.
+
+    eigen_solver : {'arpack', 'lobpcg', 'amg'}, default=None
+        The eigenvalue decomposition strategy to use. AMG requires pyamg
+        to be installed. It can be faster on very large, sparse problems.
+        If None, then ``'arpack'`` is used.
+
+    eigen_tol : float, default="auto"
+        Stopping criterion for eigendecomposition of the Laplacian matrix.
+        If `eigen_tol="auto"` then the passed tolerance will depend on the
+        `eigen_solver`:
+
+        - If `eigen_solver="arpack"`, then `eigen_tol=0.0`;
+        - If `eigen_solver="lobpcg"` or `eigen_solver="amg"`, then
+          `eigen_tol=None` which configures the underlying `lobpcg` solver to
+          automatically resolve the value according to their heuristics. See,
+          :func:`scipy.sparse.linalg.lobpcg` for details.
+
+        Note that when using `eigen_solver="lobpcg"` or `eigen_solver="amg"`
+        values of `tol<1e-5` may lead to convergence issues and should be
+        avoided.
+
+        .. versionadded:: 1.2
+
+    n_neighbors : int, default=None
+        Number of nearest neighbors for nearest_neighbors graph building.
+        If None, n_neighbors will be set to max(n_samples/10, 1).
+
+    n_jobs : int, default=None
+        The number of parallel jobs to run.
+        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.
+        ``-1`` means using all processors. See :term:`Glossary <n_jobs>`
+        for more details.
+
+    Attributes
+    ----------
+    embedding_ : ndarray of shape (n_samples, n_components)
+        Spectral embedding of the training matrix.
+
+    affinity_matrix_ : ndarray of shape (n_samples, n_samples)
+        Affinity_matrix constructed from samples or precomputed.
+
+    n_features_in_ : int
+        Number of features seen during :term:`fit`.
+
+        .. versionadded:: 0.24
+
+    feature_names_in_ : ndarray of shape (`n_features_in_`,)
+        Names of features seen during :term:`fit`. Defined only when `X`
+        has feature names that are all strings.
+
+        .. versionadded:: 1.0
+
+    n_neighbors_ : int
+        Number of nearest neighbors effectively used.
+
+    See Also
+    --------
+    Isomap : Non-linear dimensionality reduction through Isometric Mapping.
+
+    References
+    ----------
+
+    - :doi:`A Tutorial on Spectral Clustering, 2007
+      Ulrike von Luxburg
+      <10.1007/s11222-007-9033-z>`
+
+    - `On Spectral Clustering: Analysis and an algorithm, 2001
+      Andrew Y. Ng, Michael I. Jordan, Yair Weiss
+      <https://citeseerx.ist.psu.edu/doc_view/pid/796c5d6336fc52aa84db575fb821c78918b65f58>`_
+
+    - :doi:`Normalized cuts and image segmentation, 2000
+      Jianbo Shi, Jitendra Malik
+      <10.1109/34.868688>`
+
+    Examples
+    --------
+    >>> from sklearn.datasets import load_digits
+    >>> from sklearn.manifold import SpectralEmbedding
+    >>> X, _ = load_digits(return_X_y=True)
+    >>> X.shape
+    (1797, 64)
+    >>> embedding = SpectralEmbedding(n_components=2)
+    >>> X_transformed = embedding.fit_transform(X[:100])
+    >>> X_transformed.shape
+    (100, 2)
+    """
+
+    _parameter_constraints: dict = {
+        "n_components": [Interval(Integral, 1, None, closed="left")],
+        "affinity": [
+            StrOptions(
+                {
+                    "nearest_neighbors",
+                    "rbf",
+                    "precomputed",
+                    "precomputed_nearest_neighbors",
+                },
+            ),
+            callable,
+        ],
+        "gamma": [Interval(Real, 0, None, closed="left"), None],
+        "random_state": ["random_state"],
+        "eigen_solver": [StrOptions({"arpack", "lobpcg", "amg"}), None],
+        "eigen_tol": [Interval(Real, 0, None, closed="left"), StrOptions({"auto"})],
+        "n_neighbors": [Interval(Integral, 1, None, closed="left"), None],
+        "n_jobs": [None, Integral],
+    }
+
+    def __init__(
+        self,
+        n_components=2,
+        *,
+        affinity="nearest_neighbors",
+        gamma=None,
+        random_state=None,
+        eigen_solver=None,
+        eigen_tol="auto",
+        n_neighbors=None,
+        n_jobs=None,
+    ):
+        self.n_components = n_components
+        self.affinity = affinity
+        self.gamma = gamma
+        self.random_state = random_state
+        self.eigen_solver = eigen_solver
+        self.eigen_tol = eigen_tol
+        self.n_neighbors = n_neighbors
+        self.n_jobs = n_jobs
+
+    def _more_tags(self):
+        return {
+            "pairwise": self.affinity
+            in [
+                "precomputed",
+                "precomputed_nearest_neighbors",
+            ]
+        }
+
+    def _get_affinity_matrix(self, X, Y=None):
+        """Calculate the affinity matrix from data
+        Parameters
+        ----------
+        X : array-like of shape (n_samples, n_features)
+            Training vector, where `n_samples` is the number of samples
+            and `n_features` is the number of features.
+
+            If affinity is "precomputed"
+            X : array-like of shape (n_samples, n_samples),
+            Interpret X as precomputed adjacency graph computed from
+            samples.
+
+        Y: Ignored
+
+        Returns
+        -------
+        affinity_matrix of shape (n_samples, n_samples)
+        """
+        if self.affinity == "precomputed":
+            self.affinity_matrix_ = X
+            return self.affinity_matrix_
+        if self.affinity == "precomputed_nearest_neighbors":
+            estimator = NearestNeighbors(
+                n_neighbors=self.n_neighbors, n_jobs=self.n_jobs, metric="precomputed"
+            ).fit(X)
+            connectivity = estimator.kneighbors_graph(X=X, mode="connectivity")
+            self.affinity_matrix_ = 0.5 * (connectivity + connectivity.T)
+            return self.affinity_matrix_
+        if self.affinity == "nearest_neighbors":
+            if sparse.issparse(X):
+                warnings.warn(
+                    "Nearest neighbors affinity currently does "
+                    "not support sparse input, falling back to "
+                    "rbf affinity"
+                )
+                self.affinity = "rbf"
+            else:
+                self.n_neighbors_ = (
+                    self.n_neighbors
+                    if self.n_neighbors is not None
+                    else max(int(X.shape[0] / 10), 1)
+                )
+                self.affinity_matrix_ = kneighbors_graph(
+                    X, self.n_neighbors_, include_self=True, n_jobs=self.n_jobs
+                )
+                # currently only symmetric affinity_matrix supported
+                self.affinity_matrix_ = 0.5 * (
+                    self.affinity_matrix_ + self.affinity_matrix_.T
+                )
+                return self.affinity_matrix_
+        if self.affinity == "rbf":
+            self.gamma_ = self.gamma if self.gamma is not None else 1.0 / X.shape[1]
+            self.affinity_matrix_ = rbf_kernel(X, gamma=self.gamma_)
+            return self.affinity_matrix_
+        self.affinity_matrix_ = self.affinity(X)
+        return self.affinity_matrix_
+
+    @_fit_context(prefer_skip_nested_validation=True)
+    def fit(self, X, y=None):
+        """Fit the model from data in X.
+
+        Parameters
+        ----------
+        X : {array-like, sparse matrix} of shape (n_samples, n_features)
+            Training vector, where `n_samples` is the number of samples
+            and `n_features` is the number of features.
+
+            If affinity is "precomputed"
+            X : {array-like, sparse matrix}, shape (n_samples, n_samples),
+            Interpret X as precomputed adjacency graph computed from
+            samples.
+
+        y : Ignored
+            Not used, present for API consistency by convention.
+
+        Returns
+        -------
+        self : object
+            Returns the instance itself.
+        """
+        X = self._validate_data(X, accept_sparse="csr", ensure_min_samples=2)
+
+        random_state = check_random_state(self.random_state)
+
+        affinity_matrix = self._get_affinity_matrix(X)
+        self.embedding_ = _spectral_embedding(
+            affinity_matrix,
+            n_components=self.n_components,
+            eigen_solver=self.eigen_solver,
+            eigen_tol=self.eigen_tol,
+            random_state=random_state,
+        )
+        return self
+
+    def fit_transform(self, X, y=None):
+        """Fit the model from data in X and transform X.
+
+        Parameters
+        ----------
+        X : {array-like, sparse matrix} of shape (n_samples, n_features)
+            Training vector, where `n_samples` is the number of samples
+            and `n_features` is the number of features.
+
+            If affinity is "precomputed"
+            X : {array-like, sparse matrix} of shape (n_samples, n_samples),
+            Interpret X as precomputed adjacency graph computed from
+            samples.
+
+        y : Ignored
+            Not used, present for API consistency by convention.
+
+        Returns
+        -------
+        X_new : array-like of shape (n_samples, n_components)
+            Spectral embedding of the training matrix.
+        """
+        self.fit(X)
+        return self.embedding_
@@ -0,0 +1,120 @@
+import numpy as np
+
+from libc cimport math
+from libc.math cimport INFINITY
+
+from ..utils._typedefs cimport float32_t, float64_t
+
+
+cdef float EPSILON_DBL = 1e-8
+cdef float PERPLEXITY_TOLERANCE = 1e-5
+
+
+# TODO: have this function support float32 and float64 and preserve inputs' dtypes.
+def _binary_search_perplexity(
+        const float32_t[:, :] sqdistances,
+        float desired_perplexity,
+        int verbose):
+    """Binary search for sigmas of conditional Gaussians.
+
+    This approximation reduces the computational complexity from O(N^2) to
+    O(uN).
+
+    Parameters
+    ----------
+    sqdistances : ndarray of shape (n_samples, n_neighbors), dtype=np.float32
+        Distances between training samples and their k nearest neighbors.
+        When using the exact method, this is a square (n_samples, n_samples)
+        distance matrix. The TSNE default metric is "euclidean" which is
+        interpreted as squared euclidean distance.
+
+    desired_perplexity : float
+        Desired perplexity (2^entropy) of the conditional Gaussians.
+
+    verbose : int
+        Verbosity level.
+
+    Returns
+    -------
+    P : ndarray of shape (n_samples, n_samples), dtype=np.float64
+        Probabilities of conditional Gaussian distributions p_i|j.
+    """
+    # Maximum number of binary search steps
+    cdef long n_steps = 100
+
+    cdef long n_samples = sqdistances.shape[0]
+    cdef long n_neighbors = sqdistances.shape[1]
+    cdef int using_neighbors = n_neighbors < n_samples
+    # Precisions of conditional Gaussian distributions
+    cdef double beta
+    cdef double beta_min
+    cdef double beta_max
+    cdef double beta_sum = 0.0
+
+    # Use log scale
+    cdef double desired_entropy = math.log(desired_perplexity)
+    cdef double entropy_diff
+
+    cdef double entropy
+    cdef double sum_Pi
+    cdef double sum_disti_Pi
+    cdef long i, j, l
+
+    # This array is later used as a 32bit array. It has multiple intermediate
+    # floating point additions that benefit from the extra precision
+    cdef float64_t[:, :] P = np.zeros(
+        (n_samples, n_neighbors), dtype=np.float64)
+
+    for i in range(n_samples):
+        beta_min = -INFINITY
+        beta_max = INFINITY
+        beta = 1.0
+
+        # Binary search of precision for i-th conditional distribution
+        for l in range(n_steps):
+            # Compute current entropy and corresponding probabilities
+            # computed just over the nearest neighbors or over all data
+            # if we're not using neighbors
+            sum_Pi = 0.0
+            for j in range(n_neighbors):
+                if j != i or using_neighbors:
+                    P[i, j] = math.exp(-sqdistances[i, j] * beta)
+                    sum_Pi += P[i, j]
+
+            if sum_Pi == 0.0:
+                sum_Pi = EPSILON_DBL
+            sum_disti_Pi = 0.0
+
+            for j in range(n_neighbors):
+                P[i, j] /= sum_Pi
+                sum_disti_Pi += sqdistances[i, j] * P[i, j]
+
+            entropy = math.log(sum_Pi) + beta * sum_disti_Pi
+            entropy_diff = entropy - desired_entropy
+
+            if math.fabs(entropy_diff) <= PERPLEXITY_TOLERANCE:
+                break
+
+            if entropy_diff > 0.0:
+                beta_min = beta
+                if beta_max == INFINITY:
+                    beta *= 2.0
+                else:
+                    beta = (beta + beta_max) / 2.0
+            else:
+                beta_max = beta
+                if beta_min == -INFINITY:
+                    beta /= 2.0
+                else:
+                    beta = (beta + beta_min) / 2.0
+
+        beta_sum += beta
+
+        if verbose and ((i + 1) % 1000 == 0 or i + 1 == n_samples):
+            print("[t-SNE] Computed conditional probabilities for sample "
+                  "%d / %d" % (i + 1, n_samples))
+
+    if verbose:
+        print("[t-SNE] Mean sigma: %f"
+              % np.mean(math.sqrt(n_samples / beta_sum)))
+    return np.asarray(P)
@@ -0,0 +1,16 @@
+py.extension_module(
+  '_utils',
+  ['_utils.pyx', utils_cython_tree],
+  cython_args: cython_args,
+  subdir: 'sklearn/manifold',
+  install: true
+)
+
+py.extension_module(
+  '_barnes_hut_tsne',
+  '_barnes_hut_tsne.pyx',
+  dependencies: [np_dep],
+  cython_args: cython_args,
+  subdir: 'sklearn/manifold',
+  install: true
+)
@@ -0,0 +1,348 @@
+import math
+from itertools import product
+
+import numpy as np
+import pytest
+from scipy.sparse import rand as sparse_rand
+
+from sklearn import clone, datasets, manifold, neighbors, pipeline, preprocessing
+from sklearn.datasets import make_blobs
+from sklearn.metrics.pairwise import pairwise_distances
+from sklearn.utils._testing import (
+    assert_allclose,
+    assert_allclose_dense_sparse,
+    assert_array_equal,
+)
+from sklearn.utils.fixes import CSR_CONTAINERS
+
+eigen_solvers = ["auto", "dense", "arpack"]
+path_methods = ["auto", "FW", "D"]
+
+
+def create_sample_data(dtype, n_pts=25, add_noise=False):
+    # grid of equidistant points in 2D, n_components = n_dim
+    n_per_side = int(math.sqrt(n_pts))
+    X = np.array(list(product(range(n_per_side), repeat=2))).astype(dtype, copy=False)
+    if add_noise:
+        # add noise in a third dimension
+        rng = np.random.RandomState(0)
+        noise = 0.1 * rng.randn(n_pts, 1).astype(dtype, copy=False)
+        X = np.concatenate((X, noise), 1)
+    return X
+
+
+@pytest.mark.parametrize("n_neighbors, radius", [(24, None), (None, np.inf)])
+@pytest.mark.parametrize("eigen_solver", eigen_solvers)
+@pytest.mark.parametrize("path_method", path_methods)
+def test_isomap_simple_grid(
+    global_dtype, n_neighbors, radius, eigen_solver, path_method
+):
+    # Isomap should preserve distances when all neighbors are used
+    n_pts = 25
+    X = create_sample_data(global_dtype, n_pts=n_pts, add_noise=False)
+
+    # distances from each point to all others
+    if n_neighbors is not None:
+        G = neighbors.kneighbors_graph(X, n_neighbors, mode="distance")
+    else:
+        G = neighbors.radius_neighbors_graph(X, radius, mode="distance")
+
+    clf = manifold.Isomap(
+        n_neighbors=n_neighbors,
+        radius=radius,
+        n_components=2,
+        eigen_solver=eigen_solver,
+        path_method=path_method,
+    )
+    clf.fit(X)
+
+    if n_neighbors is not None:
+        G_iso = neighbors.kneighbors_graph(clf.embedding_, n_neighbors, mode="distance")
+    else:
+        G_iso = neighbors.radius_neighbors_graph(
+            clf.embedding_, radius, mode="distance"
+        )
+    atol = 1e-5 if global_dtype == np.float32 else 0
+    assert_allclose_dense_sparse(G, G_iso, atol=atol)
+
+
+@pytest.mark.parametrize("n_neighbors, radius", [(24, None), (None, np.inf)])
+@pytest.mark.parametrize("eigen_solver", eigen_solvers)
+@pytest.mark.parametrize("path_method", path_methods)
+def test_isomap_reconstruction_error(
+    global_dtype, n_neighbors, radius, eigen_solver, path_method
+):
+    if global_dtype is np.float32:
+        pytest.skip(
+            "Skipping test due to numerical instabilities on float32 data"
+            "from KernelCenterer used in the reconstruction_error method"
+        )
+
+    # Same setup as in test_isomap_simple_grid, with an added dimension
+    n_pts = 25
+    X = create_sample_data(global_dtype, n_pts=n_pts, add_noise=True)
+
+    # compute input kernel
+    if n_neighbors is not None:
+        G = neighbors.kneighbors_graph(X, n_neighbors, mode="distance").toarray()
+    else:
+        G = neighbors.radius_neighbors_graph(X, radius, mode="distance").toarray()
+    centerer = preprocessing.KernelCenterer()
+    K = centerer.fit_transform(-0.5 * G**2)
+
+    clf = manifold.Isomap(
+        n_neighbors=n_neighbors,
+        radius=radius,
+        n_components=2,
+        eigen_solver=eigen_solver,
+        path_method=path_method,
+    )
+    clf.fit(X)
+
+    # compute output kernel
+    if n_neighbors is not None:
+        G_iso = neighbors.kneighbors_graph(clf.embedding_, n_neighbors, mode="distance")
+    else:
+        G_iso = neighbors.radius_neighbors_graph(
+            clf.embedding_, radius, mode="distance"
+        )
+    G_iso = G_iso.toarray()
+    K_iso = centerer.fit_transform(-0.5 * G_iso**2)
+
+    # make sure error agrees
+    reconstruction_error = np.linalg.norm(K - K_iso) / n_pts
+    atol = 1e-5 if global_dtype == np.float32 else 0
+    assert_allclose(reconstruction_error, clf.reconstruction_error(), atol=atol)
+
+
+@pytest.mark.parametrize("n_neighbors, radius", [(2, None), (None, 0.5)])
+def test_transform(global_dtype, n_neighbors, radius):
+    n_samples = 200
+    n_components = 10
+    noise_scale = 0.01
+
+    # Create S-curve dataset
+    X, y = datasets.make_s_curve(n_samples, random_state=0)
+
+    X = X.astype(global_dtype, copy=False)
+
+    # Compute isomap embedding
+    iso = manifold.Isomap(
+        n_components=n_components, n_neighbors=n_neighbors, radius=radius
+    )
+    X_iso = iso.fit_transform(X)
+
+    # Re-embed a noisy version of the points
+    rng = np.random.RandomState(0)
+    noise = noise_scale * rng.randn(*X.shape)
+    X_iso2 = iso.transform(X + noise)
+
+    # Make sure the rms error on re-embedding is comparable to noise_scale
+    assert np.sqrt(np.mean((X_iso - X_iso2) ** 2)) < 2 * noise_scale
+
+
+@pytest.mark.parametrize("n_neighbors, radius", [(2, None), (None, 10.0)])
+def test_pipeline(n_neighbors, radius, global_dtype):
+    # check that Isomap works fine as a transformer in a Pipeline
+    # only checks that no error is raised.
+    # TODO check that it actually does something useful
+    X, y = datasets.make_blobs(random_state=0)
+    X = X.astype(global_dtype, copy=False)
+    clf = pipeline.Pipeline(
+        [
+            ("isomap", manifold.Isomap(n_neighbors=n_neighbors, radius=radius)),
+            ("clf", neighbors.KNeighborsClassifier()),
+        ]
+    )
+    clf.fit(X, y)
+    assert 0.9 < clf.score(X, y)
+
+
+def test_pipeline_with_nearest_neighbors_transformer(global_dtype):
+    # Test chaining NearestNeighborsTransformer and Isomap with
+    # neighbors_algorithm='precomputed'
+    algorithm = "auto"
+    n_neighbors = 10
+
+    X, _ = datasets.make_blobs(random_state=0)
+    X2, _ = datasets.make_blobs(random_state=1)
+
+    X = X.astype(global_dtype, copy=False)
+    X2 = X2.astype(global_dtype, copy=False)
+
+    # compare the chained version and the compact version
+    est_chain = pipeline.make_pipeline(
+        neighbors.KNeighborsTransformer(
+            n_neighbors=n_neighbors, algorithm=algorithm, mode="distance"
+        ),
+        manifold.Isomap(n_neighbors=n_neighbors, metric="precomputed"),
+    )
+    est_compact = manifold.Isomap(
+        n_neighbors=n_neighbors, neighbors_algorithm=algorithm
+    )
+
+    Xt_chain = est_chain.fit_transform(X)
+    Xt_compact = est_compact.fit_transform(X)
+    assert_allclose(Xt_chain, Xt_compact)
+
+    Xt_chain = est_chain.transform(X2)
+    Xt_compact = est_compact.transform(X2)
+    assert_allclose(Xt_chain, Xt_compact)
+
+
+@pytest.mark.parametrize(
+    "metric, p, is_euclidean",
+    [
+        ("euclidean", 2, True),
+        ("manhattan", 1, False),
+        ("minkowski", 1, False),
+        ("minkowski", 2, True),
+        (lambda x1, x2: np.sqrt(np.sum(x1**2 + x2**2)), 2, False),
+    ],
+)
+def test_different_metric(global_dtype, metric, p, is_euclidean):
+    # Isomap must work on various metric parameters work correctly
+    # and must default to euclidean.
+    X, _ = datasets.make_blobs(random_state=0)
+    X = X.astype(global_dtype, copy=False)
+
+    reference = manifold.Isomap().fit_transform(X)
+    embedding = manifold.Isomap(metric=metric, p=p).fit_transform(X)
+
+    if is_euclidean:
+        assert_allclose(embedding, reference)
+    else:
+        with pytest.raises(AssertionError, match="Not equal to tolerance"):
+            assert_allclose(embedding, reference)
+
+
+def test_isomap_clone_bug():
+    # regression test for bug reported in #6062
+    model = manifold.Isomap()
+    for n_neighbors in [10, 15, 20]:
+        model.set_params(n_neighbors=n_neighbors)
+        model.fit(np.random.rand(50, 2))
+        assert model.nbrs_.n_neighbors == n_neighbors
+
+
+@pytest.mark.parametrize("eigen_solver", eigen_solvers)
+@pytest.mark.parametrize("path_method", path_methods)
+@pytest.mark.parametrize("csr_container", CSR_CONTAINERS)
+def test_sparse_input(
+    global_dtype, eigen_solver, path_method, global_random_seed, csr_container
+):
+    # TODO: compare results on dense and sparse data as proposed in:
+    # https://github.com/scikit-learn/scikit-learn/pull/23585#discussion_r968388186
+    X = csr_container(
+        sparse_rand(
+            100,
+            3,
+            density=0.1,
+            format="csr",
+            dtype=global_dtype,
+            random_state=global_random_seed,
+        )
+    )
+
+    iso_dense = manifold.Isomap(
+        n_components=2,
+        eigen_solver=eigen_solver,
+        path_method=path_method,
+        n_neighbors=8,
+    )
+    iso_sparse = clone(iso_dense)
+
+    X_trans_dense = iso_dense.fit_transform(X.toarray())
+    X_trans_sparse = iso_sparse.fit_transform(X)
+
+    assert_allclose(X_trans_sparse, X_trans_dense, rtol=1e-4, atol=1e-4)
+
+
+def test_isomap_fit_precomputed_radius_graph(global_dtype):
+    # Isomap.fit_transform must yield similar result when using
+    # a precomputed distance matrix.
+
+    X, y = datasets.make_s_curve(200, random_state=0)
+    X = X.astype(global_dtype, copy=False)
+    radius = 10
+
+    g = neighbors.radius_neighbors_graph(X, radius=radius, mode="distance")
+    isomap = manifold.Isomap(n_neighbors=None, radius=radius, metric="precomputed")
+    isomap.fit(g)
+    precomputed_result = isomap.embedding_
+
+    isomap = manifold.Isomap(n_neighbors=None, radius=radius, metric="minkowski")
+    result = isomap.fit_transform(X)
+    atol = 1e-5 if global_dtype == np.float32 else 0
+    assert_allclose(precomputed_result, result, atol=atol)
+
+
+def test_isomap_fitted_attributes_dtype(global_dtype):
+    """Check that the fitted attributes are stored accordingly to the
+    data type of X."""
+    iso = manifold.Isomap(n_neighbors=2)
+
+    X = np.array([[1, 2], [3, 4], [5, 6]], dtype=global_dtype)
+
+    iso.fit(X)
+
+    assert iso.dist_matrix_.dtype == global_dtype
+    assert iso.embedding_.dtype == global_dtype
+
+
+def test_isomap_dtype_equivalence():
+    """Check the equivalence of the results with 32 and 64 bits input."""
+    iso_32 = manifold.Isomap(n_neighbors=2)
+    X_32 = np.array([[1, 2], [3, 4], [5, 6]], dtype=np.float32)
+    iso_32.fit(X_32)
+
+    iso_64 = manifold.Isomap(n_neighbors=2)
+    X_64 = np.array([[1, 2], [3, 4], [5, 6]], dtype=np.float64)
+    iso_64.fit(X_64)
+
+    assert_allclose(iso_32.dist_matrix_, iso_64.dist_matrix_)
+
+
+def test_isomap_raise_error_when_neighbor_and_radius_both_set():
+    # Isomap.fit_transform must raise a ValueError if
+    # radius and n_neighbors are provided.
+
+    X, _ = datasets.load_digits(return_X_y=True)
+    isomap = manifold.Isomap(n_neighbors=3, radius=5.5)
+    msg = "Both n_neighbors and radius are provided"
+    with pytest.raises(ValueError, match=msg):
+        isomap.fit_transform(X)
+
+
+def test_multiple_connected_components():
+    # Test that a warning is raised when the graph has multiple components
+    X = np.array([0, 1, 2, 5, 6, 7])[:, None]
+    with pytest.warns(UserWarning, match="number of connected components"):
+        manifold.Isomap(n_neighbors=2).fit(X)
+
+
+def test_multiple_connected_components_metric_precomputed(global_dtype):
+    # Test that an error is raised when the graph has multiple components
+    # and when X is a precomputed neighbors graph.
+    X = np.array([0, 1, 2, 5, 6, 7])[:, None].astype(global_dtype, copy=False)
+
+    # works with a precomputed distance matrix (dense)
+    X_distances = pairwise_distances(X)
+    with pytest.warns(UserWarning, match="number of connected components"):
+        manifold.Isomap(n_neighbors=1, metric="precomputed").fit(X_distances)
+
+    # does not work with a precomputed neighbors graph (sparse)
+    X_graph = neighbors.kneighbors_graph(X, n_neighbors=2, mode="distance")
+    with pytest.raises(RuntimeError, match="number of connected components"):
+        manifold.Isomap(n_neighbors=1, metric="precomputed").fit(X_graph)
+
+
+def test_get_feature_names_out():
+    """Check get_feature_names_out for Isomap."""
+    X, y = make_blobs(random_state=0, n_features=4)
+    n_components = 2
+
+    iso = manifold.Isomap(n_components=n_components)
+    iso.fit_transform(X)
+    names = iso.get_feature_names_out()
+    assert_array_equal([f"isomap{i}" for i in range(n_components)], names)
@@ -0,0 +1,171 @@
+from itertools import product
+
+import numpy as np
+import pytest
+from scipy import linalg
+
+from sklearn import manifold, neighbors
+from sklearn.datasets import make_blobs
+from sklearn.manifold._locally_linear import barycenter_kneighbors_graph
+from sklearn.utils._testing import (
+    assert_allclose,
+    assert_array_equal,
+    ignore_warnings,
+)
+
+eigen_solvers = ["dense", "arpack"]
+
+
+# ----------------------------------------------------------------------
+# Test utility routines
+def test_barycenter_kneighbors_graph(global_dtype):
+    X = np.array([[0, 1], [1.01, 1.0], [2, 0]], dtype=global_dtype)
+
+    graph = barycenter_kneighbors_graph(X, 1)
+    expected_graph = np.array(
+        [[0.0, 1.0, 0.0], [1.0, 0.0, 0.0], [0.0, 1.0, 0.0]], dtype=global_dtype
+    )
+
+    assert graph.dtype == global_dtype
+
+    assert_allclose(graph.toarray(), expected_graph)
+
+    graph = barycenter_kneighbors_graph(X, 2)
+    # check that columns sum to one
+    assert_allclose(np.sum(graph.toarray(), axis=1), np.ones(3))
+    pred = np.dot(graph.toarray(), X)
+    assert linalg.norm(pred - X) / X.shape[0] < 1
+
+
+# ----------------------------------------------------------------------
+# Test LLE by computing the reconstruction error on some manifolds.
+
+
+def test_lle_simple_grid(global_dtype):
+    # note: ARPACK is numerically unstable, so this test will fail for
+    #       some random seeds.  We choose 42 because the tests pass.
+    #       for arm64 platforms 2 makes the test fail.
+    # TODO: rewrite this test to make less sensitive to the random seed,
+    # irrespective of the platform.
+    rng = np.random.RandomState(42)
+
+    # grid of equidistant points in 2D, n_components = n_dim
+    X = np.array(list(product(range(5), repeat=2)))
+    X = X + 1e-10 * rng.uniform(size=X.shape)
+    X = X.astype(global_dtype, copy=False)
+
+    n_components = 2
+    clf = manifold.LocallyLinearEmbedding(
+        n_neighbors=5, n_components=n_components, random_state=rng
+    )
+    tol = 0.1
+
+    N = barycenter_kneighbors_graph(X, clf.n_neighbors).toarray()
+    reconstruction_error = linalg.norm(np.dot(N, X) - X, "fro")
+    assert reconstruction_error < tol
+
+    for solver in eigen_solvers:
+        clf.set_params(eigen_solver=solver)
+        clf.fit(X)
+        assert clf.embedding_.shape[1] == n_components
+        reconstruction_error = (
+            linalg.norm(np.dot(N, clf.embedding_) - clf.embedding_, "fro") ** 2
+        )
+
+        assert reconstruction_error < tol
+        assert_allclose(clf.reconstruction_error_, reconstruction_error, atol=1e-1)
+
+    # re-embed a noisy version of X using the transform method
+    noise = rng.randn(*X.shape).astype(global_dtype, copy=False) / 100
+    X_reembedded = clf.transform(X + noise)
+    assert linalg.norm(X_reembedded - clf.embedding_) < tol
+
+
+@pytest.mark.parametrize("method", ["standard", "hessian", "modified", "ltsa"])
+@pytest.mark.parametrize("solver", eigen_solvers)
+def test_lle_manifold(global_dtype, method, solver):
+    rng = np.random.RandomState(0)
+    # similar test on a slightly more complex manifold
+    X = np.array(list(product(np.arange(18), repeat=2)))
+    X = np.c_[X, X[:, 0] ** 2 / 18]
+    X = X + 1e-10 * rng.uniform(size=X.shape)
+    X = X.astype(global_dtype, copy=False)
+    n_components = 2
+
+    clf = manifold.LocallyLinearEmbedding(
+        n_neighbors=6, n_components=n_components, method=method, random_state=0
+    )
+    tol = 1.5 if method == "standard" else 3
+
+    N = barycenter_kneighbors_graph(X, clf.n_neighbors).toarray()
+    reconstruction_error = linalg.norm(np.dot(N, X) - X)
+    assert reconstruction_error < tol
+
+    clf.set_params(eigen_solver=solver)
+    clf.fit(X)
+    assert clf.embedding_.shape[1] == n_components
+    reconstruction_error = (
+        linalg.norm(np.dot(N, clf.embedding_) - clf.embedding_, "fro") ** 2
+    )
+    details = "solver: %s, method: %s" % (solver, method)
+    assert reconstruction_error < tol, details
+    assert (
+        np.abs(clf.reconstruction_error_ - reconstruction_error)
+        < tol * reconstruction_error
+    ), details
+
+
+def test_pipeline():
+    # check that LocallyLinearEmbedding works fine as a Pipeline
+    # only checks that no error is raised.
+    # TODO check that it actually does something useful
+    from sklearn import datasets, pipeline
+
+    X, y = datasets.make_blobs(random_state=0)
+    clf = pipeline.Pipeline(
+        [
+            ("filter", manifold.LocallyLinearEmbedding(random_state=0)),
+            ("clf", neighbors.KNeighborsClassifier()),
+        ]
+    )
+    clf.fit(X, y)
+    assert 0.9 < clf.score(X, y)
+
+
+# Test the error raised when the weight matrix is singular
+def test_singular_matrix():
+    M = np.ones((200, 3))
+    f = ignore_warnings
+    with pytest.raises(ValueError, match="Error in determining null-space with ARPACK"):
+        f(
+            manifold.locally_linear_embedding(
+                M,
+                n_neighbors=2,
+                n_components=1,
+                method="standard",
+                eigen_solver="arpack",
+            )
+        )
+
+
+# regression test for #6033
+def test_integer_input():
+    rand = np.random.RandomState(0)
+    X = rand.randint(0, 100, size=(20, 3))
+
+    for method in ["standard", "hessian", "modified", "ltsa"]:
+        clf = manifold.LocallyLinearEmbedding(method=method, n_neighbors=10)
+        clf.fit(X)  # this previously raised a TypeError
+
+
+def test_get_feature_names_out():
+    """Check get_feature_names_out for LocallyLinearEmbedding."""
+    X, y = make_blobs(random_state=0, n_features=4)
+    n_components = 2
+
+    iso = manifold.LocallyLinearEmbedding(n_components=n_components)
+    iso.fit(X)
+    names = iso.get_feature_names_out()
+    assert_array_equal(
+        [f"locallylinearembedding{i}" for i in range(n_components)], names
+    )
@@ -0,0 +1,87 @@
+from unittest.mock import Mock
+
+import numpy as np
+import pytest
+from numpy.testing import assert_allclose, assert_array_almost_equal
+
+from sklearn.manifold import _mds as mds
+from sklearn.metrics import euclidean_distances
+
+
+def test_smacof():
+    # test metric smacof using the data of "Modern Multidimensional Scaling",
+    # Borg & Groenen, p 154
+    sim = np.array([[0, 5, 3, 4], [5, 0, 2, 2], [3, 2, 0, 1], [4, 2, 1, 0]])
+    Z = np.array([[-0.266, -0.539], [0.451, 0.252], [0.016, -0.238], [-0.200, 0.524]])
+    X, _ = mds.smacof(sim, init=Z, n_components=2, max_iter=1, n_init=1)
+    X_true = np.array(
+        [[-1.415, -2.471], [1.633, 1.107], [0.249, -0.067], [-0.468, 1.431]]
+    )
+    assert_array_almost_equal(X, X_true, decimal=3)
+
+
+def test_smacof_error():
+    # Not symmetric similarity matrix:
+    sim = np.array([[0, 5, 9, 4], [5, 0, 2, 2], [3, 2, 0, 1], [4, 2, 1, 0]])
+
+    with pytest.raises(ValueError):
+        mds.smacof(sim)
+
+    # Not squared similarity matrix:
+    sim = np.array([[0, 5, 9, 4], [5, 0, 2, 2], [4, 2, 1, 0]])
+
+    with pytest.raises(ValueError):
+        mds.smacof(sim)
+
+    # init not None and not correct format:
+    sim = np.array([[0, 5, 3, 4], [5, 0, 2, 2], [3, 2, 0, 1], [4, 2, 1, 0]])
+
+    Z = np.array([[-0.266, -0.539], [0.016, -0.238], [-0.200, 0.524]])
+    with pytest.raises(ValueError):
+        mds.smacof(sim, init=Z, n_init=1)
+
+
+def test_MDS():
+    sim = np.array([[0, 5, 3, 4], [5, 0, 2, 2], [3, 2, 0, 1], [4, 2, 1, 0]])
+    mds_clf = mds.MDS(metric=False, n_jobs=3, dissimilarity="precomputed")
+    mds_clf.fit(sim)
+
+
+@pytest.mark.parametrize("k", [0.5, 1.5, 2])
+def test_normed_stress(k):
+    """Test that non-metric MDS normalized stress is scale-invariant."""
+    sim = np.array([[0, 5, 3, 4], [5, 0, 2, 2], [3, 2, 0, 1], [4, 2, 1, 0]])
+
+    X1, stress1 = mds.smacof(sim, metric=False, max_iter=5, random_state=0)
+    X2, stress2 = mds.smacof(k * sim, metric=False, max_iter=5, random_state=0)
+
+    assert_allclose(stress1, stress2, rtol=1e-5)
+    assert_allclose(X1, X2, rtol=1e-5)
+
+
+def test_normalize_metric_warning():
+    """
+    Test that a UserWarning is emitted when using normalized stress with
+    metric-MDS.
+    """
+    msg = "Normalized stress is not supported"
+    sim = np.array([[0, 5, 3, 4], [5, 0, 2, 2], [3, 2, 0, 1], [4, 2, 1, 0]])
+    with pytest.raises(ValueError, match=msg):
+        mds.smacof(sim, metric=True, normalized_stress=True)
+
+
+@pytest.mark.parametrize("metric", [True, False])
+def test_normalized_stress_auto(metric, monkeypatch):
+    rng = np.random.RandomState(0)
+    X = rng.randn(4, 3)
+    dist = euclidean_distances(X)
+
+    mock = Mock(side_effect=mds._smacof_single)
+    monkeypatch.setattr("sklearn.manifold._mds._smacof_single", mock)
+
+    est = mds.MDS(metric=metric, normalized_stress="auto", random_state=rng)
+    est.fit_transform(X)
+    assert mock.call_args[1]["normalized_stress"] != metric
+
+    mds.smacof(dist, metric=metric, normalized_stress="auto", random_state=rng)
+    assert mock.call_args[1]["normalized_stress"] != metric
@@ -0,0 +1,541 @@
+from unittest.mock import Mock
+
+import numpy as np
+import pytest
+from scipy import sparse
+from scipy.linalg import eigh
+from scipy.sparse.linalg import eigsh, lobpcg
+
+from sklearn.cluster import KMeans
+from sklearn.datasets import make_blobs
+from sklearn.manifold import SpectralEmbedding, _spectral_embedding, spectral_embedding
+from sklearn.manifold._spectral_embedding import (
+    _graph_connected_component,
+    _graph_is_connected,
+)
+from sklearn.metrics import normalized_mutual_info_score, pairwise_distances
+from sklearn.metrics.pairwise import rbf_kernel
+from sklearn.neighbors import NearestNeighbors
+from sklearn.utils._testing import assert_array_almost_equal, assert_array_equal
+from sklearn.utils.extmath import _deterministic_vector_sign_flip
+from sklearn.utils.fixes import (
+    COO_CONTAINERS,
+    CSC_CONTAINERS,
+    CSR_CONTAINERS,
+    parse_version,
+    sp_version,
+)
+from sklearn.utils.fixes import laplacian as csgraph_laplacian
+
+try:
+    from pyamg import smoothed_aggregation_solver  # noqa
+
+    pyamg_available = True
+except ImportError:
+    pyamg_available = False
+skip_if_no_pyamg = pytest.mark.skipif(
+    not pyamg_available, reason="PyAMG is required for the tests in this function."
+)
+
+# non centered, sparse centers to check the
+centers = np.array(
+    [
+        [0.0, 5.0, 0.0, 0.0, 0.0],
+        [0.0, 0.0, 4.0, 0.0, 0.0],
+        [1.0, 0.0, 0.0, 5.0, 1.0],
+    ]
+)
+n_samples = 1000
+n_clusters, n_features = centers.shape
+S, true_labels = make_blobs(
+    n_samples=n_samples, centers=centers, cluster_std=1.0, random_state=42
+)
+
+
+def _assert_equal_with_sign_flipping(A, B, tol=0.0):
+    """Check array A and B are equal with possible sign flipping on
+    each columns"""
+    tol_squared = tol**2
+    for A_col, B_col in zip(A.T, B.T):
+        assert (
+            np.max((A_col - B_col) ** 2) <= tol_squared
+            or np.max((A_col + B_col) ** 2) <= tol_squared
+        )
+
+
+@pytest.mark.parametrize("coo_container", COO_CONTAINERS)
+def test_sparse_graph_connected_component(coo_container):
+    rng = np.random.RandomState(42)
+    n_samples = 300
+    boundaries = [0, 42, 121, 200, n_samples]
+    p = rng.permutation(n_samples)
+    connections = []
+
+    for start, stop in zip(boundaries[:-1], boundaries[1:]):
+        group = p[start:stop]
+        # Connect all elements within the group at least once via an
+        # arbitrary path that spans the group.
+        for i in range(len(group) - 1):
+            connections.append((group[i], group[i + 1]))
+
+        # Add some more random connections within the group
+        min_idx, max_idx = 0, len(group) - 1
+        n_random_connections = 1000
+        source = rng.randint(min_idx, max_idx, size=n_random_connections)
+        target = rng.randint(min_idx, max_idx, size=n_random_connections)
+        connections.extend(zip(group[source], group[target]))
+
+    # Build a symmetric affinity matrix
+    row_idx, column_idx = tuple(np.array(connections).T)
+    data = rng.uniform(0.1, 42, size=len(connections))
+    affinity = coo_container((data, (row_idx, column_idx)))
+    affinity = 0.5 * (affinity + affinity.T)
+
+    for start, stop in zip(boundaries[:-1], boundaries[1:]):
+        component_1 = _graph_connected_component(affinity, p[start])
+        component_size = stop - start
+        assert component_1.sum() == component_size
+
+        # We should retrieve the same component mask by starting by both ends
+        # of the group
+        component_2 = _graph_connected_component(affinity, p[stop - 1])
+        assert component_2.sum() == component_size
+        assert_array_equal(component_1, component_2)
+
+
+# TODO: investigate why this test is seed-sensitive on 32-bit Python
+# runtimes. Is this revealing a numerical stability problem ? Or is it
+# expected from the test numerical design ? In the latter case the test
+# should be made less seed-sensitive instead.
+@pytest.mark.parametrize(
+    "eigen_solver",
+    [
+        "arpack",
+        "lobpcg",
+        pytest.param("amg", marks=skip_if_no_pyamg),
+    ],
+)
+@pytest.mark.parametrize("dtype", [np.float32, np.float64])
+def test_spectral_embedding_two_components(eigen_solver, dtype, seed=0):
+    # Test spectral embedding with two components
+    random_state = np.random.RandomState(seed)
+    n_sample = 100
+    affinity = np.zeros(shape=[n_sample * 2, n_sample * 2])
+    # first component
+    affinity[0:n_sample, 0:n_sample] = (
+        np.abs(random_state.randn(n_sample, n_sample)) + 2
+    )
+    # second component
+    affinity[n_sample::, n_sample::] = (
+        np.abs(random_state.randn(n_sample, n_sample)) + 2
+    )
+
+    # Test of internal _graph_connected_component before connection
+    component = _graph_connected_component(affinity, 0)
+    assert component[:n_sample].all()
+    assert not component[n_sample:].any()
+    component = _graph_connected_component(affinity, -1)
+    assert not component[:n_sample].any()
+    assert component[n_sample:].all()
+
+    # connection
+    affinity[0, n_sample + 1] = 1
+    affinity[n_sample + 1, 0] = 1
+    affinity.flat[:: 2 * n_sample + 1] = 0
+    affinity = 0.5 * (affinity + affinity.T)
+
+    true_label = np.zeros(shape=2 * n_sample)
+    true_label[0:n_sample] = 1
+
+    se_precomp = SpectralEmbedding(
+        n_components=1,
+        affinity="precomputed",
+        random_state=np.random.RandomState(seed),
+        eigen_solver=eigen_solver,
+    )
+
+    embedded_coordinate = se_precomp.fit_transform(affinity.astype(dtype))
+    # thresholding on the first components using 0.
+    label_ = np.array(embedded_coordinate.ravel() < 0, dtype=np.int64)
+    assert normalized_mutual_info_score(true_label, label_) == pytest.approx(1.0)
+
+
+@pytest.mark.parametrize("sparse_container", [None, *CSR_CONTAINERS])
+@pytest.mark.parametrize(
+    "eigen_solver",
+    [
+        "arpack",
+        "lobpcg",
+        pytest.param("amg", marks=skip_if_no_pyamg),
+    ],
+)
+@pytest.mark.parametrize("dtype", (np.float32, np.float64))
+def test_spectral_embedding_precomputed_affinity(
+    sparse_container, eigen_solver, dtype, seed=36
+):
+    # Test spectral embedding with precomputed kernel
+    gamma = 1.0
+    X = S if sparse_container is None else sparse_container(S)
+
+    se_precomp = SpectralEmbedding(
+        n_components=2,
+        affinity="precomputed",
+        random_state=np.random.RandomState(seed),
+        eigen_solver=eigen_solver,
+    )
+    se_rbf = SpectralEmbedding(
+        n_components=2,
+        affinity="rbf",
+        gamma=gamma,
+        random_state=np.random.RandomState(seed),
+        eigen_solver=eigen_solver,
+    )
+    embed_precomp = se_precomp.fit_transform(rbf_kernel(X.astype(dtype), gamma=gamma))
+    embed_rbf = se_rbf.fit_transform(X.astype(dtype))
+    assert_array_almost_equal(se_precomp.affinity_matrix_, se_rbf.affinity_matrix_)
+    _assert_equal_with_sign_flipping(embed_precomp, embed_rbf, 0.05)
+
+
+def test_precomputed_nearest_neighbors_filtering():
+    # Test precomputed graph filtering when containing too many neighbors
+    n_neighbors = 2
+    results = []
+    for additional_neighbors in [0, 10]:
+        nn = NearestNeighbors(n_neighbors=n_neighbors + additional_neighbors).fit(S)
+        graph = nn.kneighbors_graph(S, mode="connectivity")
+        embedding = (
+            SpectralEmbedding(
+                random_state=0,
+                n_components=2,
+                affinity="precomputed_nearest_neighbors",
+                n_neighbors=n_neighbors,
+            )
+            .fit(graph)
+            .embedding_
+        )
+        results.append(embedding)
+
+    assert_array_equal(results[0], results[1])
+
+
+@pytest.mark.parametrize("sparse_container", [None, *CSR_CONTAINERS])
+def test_spectral_embedding_callable_affinity(sparse_container, seed=36):
+    # Test spectral embedding with callable affinity
+    gamma = 0.9
+    kern = rbf_kernel(S, gamma=gamma)
+    X = S if sparse_container is None else sparse_container(S)
+
+    se_callable = SpectralEmbedding(
+        n_components=2,
+        affinity=(lambda x: rbf_kernel(x, gamma=gamma)),
+        gamma=gamma,
+        random_state=np.random.RandomState(seed),
+    )
+    se_rbf = SpectralEmbedding(
+        n_components=2,
+        affinity="rbf",
+        gamma=gamma,
+        random_state=np.random.RandomState(seed),
+    )
+    embed_rbf = se_rbf.fit_transform(X)
+    embed_callable = se_callable.fit_transform(X)
+    assert_array_almost_equal(se_callable.affinity_matrix_, se_rbf.affinity_matrix_)
+    assert_array_almost_equal(kern, se_rbf.affinity_matrix_)
+    _assert_equal_with_sign_flipping(embed_rbf, embed_callable, 0.05)
+
+
+# TODO: Remove when pyamg does replaces sp.rand call with np.random.rand
+# https://github.com/scikit-learn/scikit-learn/issues/15913
+@pytest.mark.filterwarnings(
+    "ignore:scipy.rand is deprecated:DeprecationWarning:pyamg.*"
+)
+# TODO: Remove when pyamg removes the use of np.float
+@pytest.mark.filterwarnings(
+    "ignore:`np.float` is a deprecated alias:DeprecationWarning:pyamg.*"
+)
+# TODO: Remove when pyamg removes the use of pinv2
+@pytest.mark.filterwarnings(
+    "ignore:scipy.linalg.pinv2 is deprecated:DeprecationWarning:pyamg.*"
+)
+@pytest.mark.filterwarnings(
+    "ignore:np.find_common_type is deprecated:DeprecationWarning:pyamg.*"
+)
+@pytest.mark.skipif(
+    not pyamg_available, reason="PyAMG is required for the tests in this function."
+)
+@pytest.mark.parametrize("dtype", (np.float32, np.float64))
+@pytest.mark.parametrize("coo_container", COO_CONTAINERS)
+def test_spectral_embedding_amg_solver(dtype, coo_container, seed=36):
+    se_amg = SpectralEmbedding(
+        n_components=2,
+        affinity="nearest_neighbors",
+        eigen_solver="amg",
+        n_neighbors=5,
+        random_state=np.random.RandomState(seed),
+    )
+    se_arpack = SpectralEmbedding(
+        n_components=2,
+        affinity="nearest_neighbors",
+        eigen_solver="arpack",
+        n_neighbors=5,
+        random_state=np.random.RandomState(seed),
+    )
+    embed_amg = se_amg.fit_transform(S.astype(dtype))
+    embed_arpack = se_arpack.fit_transform(S.astype(dtype))
+    _assert_equal_with_sign_flipping(embed_amg, embed_arpack, 1e-5)
+
+    # same with special case in which amg is not actually used
+    # regression test for #10715
+    # affinity between nodes
+    row = np.array([0, 0, 1, 2, 3, 3, 4], dtype=np.int32)
+    col = np.array([1, 2, 2, 3, 4, 5, 5], dtype=np.int32)
+    val = np.array([100, 100, 100, 1, 100, 100, 100], dtype=np.int64)
+
+    affinity = coo_container(
+        (np.hstack([val, val]), (np.hstack([row, col]), np.hstack([col, row]))),
+        shape=(6, 6),
+    )
+    se_amg.affinity = "precomputed"
+    se_arpack.affinity = "precomputed"
+    embed_amg = se_amg.fit_transform(affinity.astype(dtype))
+    embed_arpack = se_arpack.fit_transform(affinity.astype(dtype))
+    _assert_equal_with_sign_flipping(embed_amg, embed_arpack, 1e-5)
+
+    # Check that passing a sparse matrix with `np.int64` indices dtype raises an error
+    # or is successful based on the version of SciPy which is installed.
+    # Use a CSR matrix to avoid any conversion during the validation
+    affinity = affinity.tocsr()
+    affinity.indptr = affinity.indptr.astype(np.int64)
+    affinity.indices = affinity.indices.astype(np.int64)
+
+    # PR: https://github.com/scipy/scipy/pull/18913
+    # First integration in 1.11.3: https://github.com/scipy/scipy/pull/19279
+    scipy_graph_traversal_supports_int64_index = sp_version >= parse_version("1.11.3")
+    if scipy_graph_traversal_supports_int64_index:
+        se_amg.fit_transform(affinity)
+    else:
+        err_msg = "Only sparse matrices with 32-bit integer indices are accepted"
+        with pytest.raises(ValueError, match=err_msg):
+            se_amg.fit_transform(affinity)
+
+
+# TODO: Remove filterwarnings when pyamg does replaces sp.rand call with
+# np.random.rand:
+# https://github.com/scikit-learn/scikit-learn/issues/15913
+@pytest.mark.filterwarnings(
+    "ignore:scipy.rand is deprecated:DeprecationWarning:pyamg.*"
+)
+# TODO: Remove when pyamg removes the use of np.float
+@pytest.mark.filterwarnings(
+    "ignore:`np.float` is a deprecated alias:DeprecationWarning:pyamg.*"
+)
+# TODO: Remove when pyamg removes the use of pinv2
+@pytest.mark.filterwarnings(
+    "ignore:scipy.linalg.pinv2 is deprecated:DeprecationWarning:pyamg.*"
+)
+@pytest.mark.skipif(
+    not pyamg_available, reason="PyAMG is required for the tests in this function."
+)
+# TODO: Remove when pyamg removes the use of np.find_common_type
+@pytest.mark.filterwarnings(
+    "ignore:np.find_common_type is deprecated:DeprecationWarning:pyamg.*"
+)
+@pytest.mark.parametrize("dtype", (np.float32, np.float64))
+def test_spectral_embedding_amg_solver_failure(dtype, seed=36):
+    # Non-regression test for amg solver failure (issue #13393 on github)
+    num_nodes = 100
+    X = sparse.rand(num_nodes, num_nodes, density=0.1, random_state=seed)
+    X = X.astype(dtype)
+    upper = sparse.triu(X) - sparse.diags(X.diagonal())
+    sym_matrix = upper + upper.T
+    embedding = spectral_embedding(
+        sym_matrix, n_components=10, eigen_solver="amg", random_state=0
+    )
+
+    # Check that the learned embedding is stable w.r.t. random solver init:
+    for i in range(3):
+        new_embedding = spectral_embedding(
+            sym_matrix, n_components=10, eigen_solver="amg", random_state=i + 1
+        )
+        _assert_equal_with_sign_flipping(embedding, new_embedding, tol=0.05)
+
+
+@pytest.mark.filterwarnings("ignore:the behavior of nmi will change in version 0.22")
+def test_pipeline_spectral_clustering(seed=36):
+    # Test using pipeline to do spectral clustering
+    random_state = np.random.RandomState(seed)
+    se_rbf = SpectralEmbedding(
+        n_components=n_clusters, affinity="rbf", random_state=random_state
+    )
+    se_knn = SpectralEmbedding(
+        n_components=n_clusters,
+        affinity="nearest_neighbors",
+        n_neighbors=5,
+        random_state=random_state,
+    )
+    for se in [se_rbf, se_knn]:
+        km = KMeans(n_clusters=n_clusters, random_state=random_state, n_init=10)
+        km.fit(se.fit_transform(S))
+        assert_array_almost_equal(
+            normalized_mutual_info_score(km.labels_, true_labels), 1.0, 2
+        )
+
+
+def test_connectivity(seed=36):
+    # Test that graph connectivity test works as expected
+    graph = np.array(
+        [
+            [1, 0, 0, 0, 0],
+            [0, 1, 1, 0, 0],
+            [0, 1, 1, 1, 0],
+            [0, 0, 1, 1, 1],
+            [0, 0, 0, 1, 1],
+        ]
+    )
+    assert not _graph_is_connected(graph)
+    for csr_container in CSR_CONTAINERS:
+        assert not _graph_is_connected(csr_container(graph))
+    for csc_container in CSC_CONTAINERS:
+        assert not _graph_is_connected(csc_container(graph))
+
+    graph = np.array(
+        [
+            [1, 1, 0, 0, 0],
+            [1, 1, 1, 0, 0],
+            [0, 1, 1, 1, 0],
+            [0, 0, 1, 1, 1],
+            [0, 0, 0, 1, 1],
+        ]
+    )
+    assert _graph_is_connected(graph)
+    for csr_container in CSR_CONTAINERS:
+        assert _graph_is_connected(csr_container(graph))
+    for csc_container in CSC_CONTAINERS:
+        assert _graph_is_connected(csc_container(graph))
+
+
+def test_spectral_embedding_deterministic():
+    # Test that Spectral Embedding is deterministic
+    random_state = np.random.RandomState(36)
+    data = random_state.randn(10, 30)
+    sims = rbf_kernel(data)
+    embedding_1 = spectral_embedding(sims)
+    embedding_2 = spectral_embedding(sims)
+    assert_array_almost_equal(embedding_1, embedding_2)
+
+
+def test_spectral_embedding_unnormalized():
+    # Test that spectral_embedding is also processing unnormalized laplacian
+    # correctly
+    random_state = np.random.RandomState(36)
+    data = random_state.randn(10, 30)
+    sims = rbf_kernel(data)
+    n_components = 8
+    embedding_1 = spectral_embedding(
+        sims, norm_laplacian=False, n_components=n_components, drop_first=False
+    )
+
+    # Verify using manual computation with dense eigh
+    laplacian, dd = csgraph_laplacian(sims, normed=False, return_diag=True)
+    _, diffusion_map = eigh(laplacian)
+    embedding_2 = diffusion_map.T[:n_components]
+    embedding_2 = _deterministic_vector_sign_flip(embedding_2).T
+
+    assert_array_almost_equal(embedding_1, embedding_2)
+
+
+def test_spectral_embedding_first_eigen_vector():
+    # Test that the first eigenvector of spectral_embedding
+    # is constant and that the second is not (for a connected graph)
+    random_state = np.random.RandomState(36)
+    data = random_state.randn(10, 30)
+    sims = rbf_kernel(data)
+    n_components = 2
+
+    for seed in range(10):
+        embedding = spectral_embedding(
+            sims,
+            norm_laplacian=False,
+            n_components=n_components,
+            drop_first=False,
+            random_state=seed,
+        )
+
+        assert np.std(embedding[:, 0]) == pytest.approx(0)
+        assert np.std(embedding[:, 1]) > 1e-3
+
+
+@pytest.mark.parametrize(
+    "eigen_solver",
+    [
+        "arpack",
+        "lobpcg",
+        pytest.param("amg", marks=skip_if_no_pyamg),
+    ],
+)
+@pytest.mark.parametrize("dtype", [np.float32, np.float64])
+def test_spectral_embedding_preserves_dtype(eigen_solver, dtype):
+    """Check that `SpectralEmbedding is preserving the dtype of the fitted
+    attribute and transformed data.
+
+    Ideally, this test should be covered by the common test
+    `check_transformer_preserve_dtypes`. However, this test only run
+    with transformers implementing `transform` while `SpectralEmbedding`
+    implements only `fit_transform`.
+    """
+    X = S.astype(dtype)
+    se = SpectralEmbedding(
+        n_components=2, affinity="rbf", eigen_solver=eigen_solver, random_state=0
+    )
+    X_trans = se.fit_transform(X)
+
+    assert X_trans.dtype == dtype
+    assert se.embedding_.dtype == dtype
+    assert se.affinity_matrix_.dtype == dtype
+
+
+@pytest.mark.skipif(
+    pyamg_available,
+    reason="PyAMG is installed and we should not test for an error.",
+)
+def test_error_pyamg_not_available():
+    se_precomp = SpectralEmbedding(
+        n_components=2,
+        affinity="rbf",
+        eigen_solver="amg",
+    )
+    err_msg = "The eigen_solver was set to 'amg', but pyamg is not available."
+    with pytest.raises(ValueError, match=err_msg):
+        se_precomp.fit_transform(S)
+
+
+# TODO: Remove when pyamg removes the use of np.find_common_type
+@pytest.mark.filterwarnings(
+    "ignore:np.find_common_type is deprecated:DeprecationWarning:pyamg.*"
+)
+@pytest.mark.parametrize("solver", ["arpack", "amg", "lobpcg"])
+@pytest.mark.parametrize("csr_container", CSR_CONTAINERS)
+def test_spectral_eigen_tol_auto(monkeypatch, solver, csr_container):
+    """Test that `eigen_tol="auto"` is resolved correctly"""
+    if solver == "amg" and not pyamg_available:
+        pytest.skip("PyAMG is not available.")
+    X, _ = make_blobs(
+        n_samples=200, random_state=0, centers=[[1, 1], [-1, -1]], cluster_std=0.01
+    )
+    D = pairwise_distances(X)  # Distance matrix
+    S = np.max(D) - D  # Similarity matrix
+
+    solver_func = eigsh if solver == "arpack" else lobpcg
+    default_value = 0 if solver == "arpack" else None
+    if solver == "amg":
+        S = csr_container(S)
+
+    mocked_solver = Mock(side_effect=solver_func)
+
+    monkeypatch.setattr(_spectral_embedding, solver_func.__qualname__, mocked_solver)
+
+    spectral_embedding(S, random_state=42, eigen_solver=solver, eigen_tol="auto")
+    mocked_solver.assert_called()
+
+    _, kwargs = mocked_solver.call_args
+    assert kwargs["tol"] == default_value