feat: initial commit - Phase 1 & 2 core features

2026-04-22 17:07:33 +08:00
commit 1773bda06b
25005 changed files with 6252106 additions and 0 deletions
@@ -0,0 +1,25 @@
+"""
+The :mod:`sklearn.svm` module includes Support Vector Machine algorithms.
+"""
+
+# See http://scikit-learn.sourceforge.net/modules/svm.html for complete
+# documentation.
+
+# Author: Fabian Pedregosa <fabian.pedregosa@inria.fr> with help from
+#         the scikit-learn community. LibSVM and LibLinear are copyright
+#         of their respective owners.
+# License: BSD 3 clause (C) INRIA 2010
+
+from ._bounds import l1_min_c
+from ._classes import SVC, SVR, LinearSVC, LinearSVR, NuSVC, NuSVR, OneClassSVM
+
+__all__ = [
+    "LinearSVC",
+    "LinearSVR",
+    "NuSVC",
+    "NuSVR",
+    "OneClassSVM",
+    "SVC",
+    "SVR",
+    "l1_min_c",
+]
@@ -0,0 +1,95 @@
+"""Determination of parameter bounds"""
+
+# Author: Paolo Losi
+# License: BSD 3 clause
+
+from numbers import Real
+
+import numpy as np
+
+from ..preprocessing import LabelBinarizer
+from ..utils._param_validation import Interval, StrOptions, validate_params
+from ..utils.extmath import safe_sparse_dot
+from ..utils.validation import check_array, check_consistent_length
+
+
+@validate_params(
+    {
+        "X": ["array-like", "sparse matrix"],
+        "y": ["array-like"],
+        "loss": [StrOptions({"squared_hinge", "log"})],
+        "fit_intercept": ["boolean"],
+        "intercept_scaling": [Interval(Real, 0, None, closed="neither")],
+    },
+    prefer_skip_nested_validation=True,
+)
+def l1_min_c(X, y, *, loss="squared_hinge", fit_intercept=True, intercept_scaling=1.0):
+    """Return the lowest bound for C.
+
+    The lower bound for C is computed such that for C in (l1_min_C, infinity)
+    the model is guaranteed not to be empty. This applies to l1 penalized
+    classifiers, such as LinearSVC with penalty='l1' and
+    linear_model.LogisticRegression with penalty='l1'.
+
+    This value is valid if class_weight parameter in fit() is not set.
+
+    Parameters
+    ----------
+    X : {array-like, sparse matrix} of shape (n_samples, n_features)
+        Training vector, where `n_samples` is the number of samples and
+        `n_features` is the number of features.
+
+    y : array-like of shape (n_samples,)
+        Target vector relative to X.
+
+    loss : {'squared_hinge', 'log'}, default='squared_hinge'
+        Specifies the loss function.
+        With 'squared_hinge' it is the squared hinge loss (a.k.a. L2 loss).
+        With 'log' it is the loss of logistic regression models.
+
+    fit_intercept : bool, default=True
+        Specifies if the intercept should be fitted by the model.
+        It must match the fit() method parameter.
+
+    intercept_scaling : float, default=1.0
+        When fit_intercept is True, instance vector x becomes
+        [x, intercept_scaling],
+        i.e. a "synthetic" feature with constant value equals to
+        intercept_scaling is appended to the instance vector.
+        It must match the fit() method parameter.
+
+    Returns
+    -------
+    l1_min_c : float
+        Minimum value for C.
+
+    Examples
+    --------
+    >>> from sklearn.svm import l1_min_c
+    >>> from sklearn.datasets import make_classification
+    >>> X, y = make_classification(n_samples=100, n_features=20, random_state=42)
+    >>> print(f"{l1_min_c(X, y, loss='squared_hinge', fit_intercept=True):.4f}")
+    0.0044
+    """
+
+    X = check_array(X, accept_sparse="csc")
+    check_consistent_length(X, y)
+
+    Y = LabelBinarizer(neg_label=-1).fit_transform(y).T
+    # maximum absolute value over classes and features
+    den = np.max(np.abs(safe_sparse_dot(Y, X)))
+    if fit_intercept:
+        bias = np.full(
+            (np.size(y), 1), intercept_scaling, dtype=np.array(intercept_scaling).dtype
+        )
+        den = max(den, abs(np.dot(Y, bias)).max())
+
+    if den == 0.0:
+        raise ValueError(
+            "Ill-posed l1_min_c calculation: l1 will always "
+            "select zero coefficients for this data"
+        )
+    if loss == "squared_hinge":
+        return 0.5 / den
+    else:  # loss == 'log':
+        return 2.0 / den
@@ -0,0 +1,43 @@
+from ..utils._typedefs cimport intp_t
+
+cdef extern from "_cython_blas_helpers.h":
+    ctypedef double (*dot_func)(int, const double*, int, const double*, int)
+    ctypedef void (*axpy_func)(int, double, const double*, int, double*, int)
+    ctypedef void (*scal_func)(int, double, const double*, int)
+    ctypedef double (*nrm2_func)(int, const double*, int)
+    cdef struct BlasFunctions:
+        dot_func dot
+        axpy_func axpy
+        scal_func scal
+        nrm2_func nrm2
+
+
+cdef extern from "linear.h":
+    cdef struct feature_node
+    cdef struct problem
+    cdef struct model
+    cdef struct parameter
+    ctypedef problem* problem_const_ptr "problem const *"
+    ctypedef parameter* parameter_const_ptr "parameter const *"
+    ctypedef char* char_const_ptr "char const *"
+    char_const_ptr check_parameter(problem_const_ptr prob, parameter_const_ptr param)
+    model *train(problem_const_ptr prob, parameter_const_ptr param, BlasFunctions *blas_functions) nogil
+    int get_nr_feature (model *model)
+    int get_nr_class (model *model)
+    void get_n_iter (model *model, int *n_iter)
+    void free_and_destroy_model (model **)
+    void destroy_param (parameter *)
+
+
+cdef extern from "liblinear_helper.c":
+    void copy_w(void *, model *, int)
+    parameter *set_parameter(int, double, double, int, char *, char *, int, int, double)
+    problem *set_problem (char *, int, int, int, int, double, char *, char *)
+    problem *csr_set_problem (char *, int, char *, char *, int, int, int, double, char *, char *)
+
+    model *set_model(parameter *, char *, intp_t *, char *, double)
+
+    double get_bias(model *)
+    void free_problem (problem *)
+    void free_parameter (parameter *)
+    void set_verbosity(int)
@@ -0,0 +1,147 @@
+"""
+Wrapper for liblinear
+
+Author: fabian.pedregosa@inria.fr
+"""
+
+import  numpy as np
+
+from ..utils._cython_blas cimport _dot, _axpy, _scal, _nrm2
+from ..utils._typedefs cimport float32_t, float64_t, int32_t
+
+include "_liblinear.pxi"
+
+
+def train_wrap(
+    object X,
+    const float64_t[::1] Y,
+    bint is_sparse,
+    int solver_type,
+    double eps,
+    double bias,
+    double C,
+    const float64_t[:] class_weight,
+    int max_iter,
+    unsigned random_seed,
+    double epsilon,
+    const float64_t[::1] sample_weight
+):
+    cdef parameter *param
+    cdef problem *problem
+    cdef model *model
+    cdef char_const_ptr error_msg
+    cdef int len_w
+    cdef bint X_has_type_float64 = X.dtype == np.float64
+    cdef char * X_data_bytes_ptr
+    cdef const float64_t[::1] X_data_64
+    cdef const float32_t[::1] X_data_32
+    cdef const int32_t[::1] X_indices
+    cdef const int32_t[::1] X_indptr
+
+    if is_sparse:
+        X_indices = X.indices
+        X_indptr = X.indptr
+        if X_has_type_float64:
+            X_data_64 = X.data
+            X_data_bytes_ptr = <char *> &X_data_64[0]
+        else:
+            X_data_32 = X.data
+            X_data_bytes_ptr = <char *> &X_data_32[0]
+
+        problem = csr_set_problem(
+            X_data_bytes_ptr,
+            X_has_type_float64,
+            <char *> &X_indices[0],
+            <char *> &X_indptr[0],
+            (<int32_t>X.shape[0]),
+            (<int32_t>X.shape[1]),
+            (<int32_t>X.nnz),
+            bias,
+            <char *> &sample_weight[0],
+            <char *> &Y[0]
+        )
+    else:
+        X_as_1d_array = X.reshape(-1)
+        if X_has_type_float64:
+            X_data_64 = X_as_1d_array
+            X_data_bytes_ptr = <char *> &X_data_64[0]
+        else:
+            X_data_32 = X_as_1d_array
+            X_data_bytes_ptr = <char *> &X_data_32[0]
+
+        problem = set_problem(
+            X_data_bytes_ptr,
+            X_has_type_float64,
+            (<int32_t>X.shape[0]),
+            (<int32_t>X.shape[1]),
+            (<int32_t>np.count_nonzero(X)),
+            bias,
+            <char *> &sample_weight[0],
+            <char *> &Y[0]
+        )
+
+    cdef int32_t[::1] class_weight_label = np.arange(class_weight.shape[0], dtype=np.intc)
+    param = set_parameter(
+        solver_type,
+        eps,
+        C,
+        class_weight.shape[0],
+        <char *> &class_weight_label[0] if class_weight_label.size > 0 else NULL,
+        <char *> &class_weight[0] if class_weight.size > 0 else NULL,
+        max_iter,
+        random_seed,
+        epsilon
+    )
+
+    error_msg = check_parameter(problem, param)
+    if error_msg:
+        free_problem(problem)
+        free_parameter(param)
+        raise ValueError(error_msg)
+
+    cdef BlasFunctions blas_functions
+    blas_functions.dot = _dot[double]
+    blas_functions.axpy = _axpy[double]
+    blas_functions.scal = _scal[double]
+    blas_functions.nrm2 = _nrm2[double]
+
+    # early return
+    with nogil:
+        model = train(problem, param, &blas_functions)
+
+    # FREE
+    free_problem(problem)
+    free_parameter(param)
+    # destroy_param(param)  don't call this or it will destroy class_weight_label and class_weight
+
+    # coef matrix holder created as fortran since that's what's used in liblinear
+    cdef float64_t[::1, :] w
+    cdef int nr_class = get_nr_class(model)
+
+    cdef int labels_ = nr_class
+    if nr_class == 2:
+        labels_ = 1
+    cdef int32_t[::1] n_iter = np.zeros(labels_, dtype=np.intc)
+    get_n_iter(model, <int *> &n_iter[0])
+
+    cdef int nr_feature = get_nr_feature(model)
+    if bias > 0:
+        nr_feature = nr_feature + 1
+    if nr_class == 2 and solver_type != 4:  # solver is not Crammer-Singer
+        w = np.empty((1, nr_feature), order='F')
+        copy_w(&w[0, 0], model, nr_feature)
+    else:
+        len_w = (nr_class) * nr_feature
+        w = np.empty((nr_class, nr_feature), order='F')
+        copy_w(&w[0, 0], model, len_w)
+
+    free_and_destroy_model(&model)
+
+    return w.base, n_iter.base
+
+
+def set_verbosity_wrap(int verbosity):
+    """
+    Control verbosity of libsvm library
+    """
+    set_verbosity(verbosity)
@@ -0,0 +1,75 @@
+################################################################################
+# Includes
+from ..utils._typedefs cimport intp_t
+
+cdef extern from "_svm_cython_blas_helpers.h":
+    ctypedef double (*dot_func)(int, const double*, int, const double*, int)
+    cdef struct BlasFunctions:
+        dot_func dot
+
+
+cdef extern from "svm.h":
+    cdef struct svm_node
+    cdef struct svm_model
+    cdef struct svm_parameter:
+        int svm_type
+        int kernel_type
+        int degree    # for poly
+        double gamma  # for poly/rbf/sigmoid
+        double coef0  # for poly/sigmoid
+
+        # these are for training only
+        double cache_size  # in MB
+        double eps         # stopping criteria
+        double C           # for C_SVC, EPSILON_SVR and NU_SVR
+        int nr_weight      # for C_SVC
+        int *weight_label  # for C_SVC
+        double* weight     # for C_SVC
+        double nu          # for NU_SVC, ONE_CLASS, and NU_SVR
+        double p           # for EPSILON_SVR
+        int shrinking      # use the shrinking heuristics
+        int probability    # do probability estimates
+        int max_iter       # ceiling on Solver runtime
+        int random_seed    # seed for random generator in probability estimation
+
+    cdef struct svm_problem:
+        int l
+        double *y
+        svm_node *x
+        double *W  # instance weights
+
+    char *svm_check_parameter(svm_problem *, svm_parameter *)
+    svm_model *svm_train(svm_problem *, svm_parameter *, int *, BlasFunctions *) nogil
+    void svm_free_and_destroy_model(svm_model** model_ptr_ptr)
+    void svm_cross_validation(svm_problem *, svm_parameter *, int nr_fold, double *target, BlasFunctions *) nogil
+
+
+cdef extern from "libsvm_helper.c":
+    # this file contains methods for accessing libsvm 'hidden' fields
+    svm_node **dense_to_sparse (char *, intp_t *)
+    void set_parameter (svm_parameter *, int , int , int , double, double ,
+                        double , double , double , double,
+                        double, int, int, int, char *, char *, int,
+                        int)
+    void set_problem (svm_problem *, char *, char *, char *, intp_t *, int)
+
+    svm_model *set_model (svm_parameter *, int, char *, intp_t *,
+                          char *, intp_t *, intp_t *, char *,
+                          char *, char *, char *, char *)
+
+    void copy_sv_coef   (char *, svm_model *)
+    void copy_n_iter  (char *, svm_model *)
+    void copy_intercept (char *, svm_model *, intp_t *)
+    void copy_SV        (char *, svm_model *, intp_t *)
+    int copy_support (char *data, svm_model *model)
+    int copy_predict (char *, svm_model *, intp_t *, char *, BlasFunctions *) nogil
+    int copy_predict_proba (char *, svm_model *, intp_t *, char *, BlasFunctions *) nogil
+    int copy_predict_values(char *, svm_model *, intp_t *, char *, int, BlasFunctions *) nogil
+    void copy_nSV     (char *, svm_model *)
+    void copy_probA   (char *, svm_model *, intp_t *)
+    void copy_probB   (char *, svm_model *, intp_t *)
+    intp_t  get_l  (svm_model *)
+    intp_t  get_nr (svm_model *)
+    int  free_problem   (svm_problem *)
+    int  free_model     (svm_model *)
+    void set_verbosity(int)
@@ -0,0 +1,917 @@
+"""
+Binding for libsvm_skl
+----------------------
+
+These are the bindings for libsvm_skl, which is a fork of libsvm[1]
+that adds to libsvm some capabilities, like index of support vectors
+and efficient representation of dense matrices.
+
+These are low-level routines, but can be used for flexibility or
+performance reasons. See sklearn.svm for a higher-level API.
+
+Low-level memory management is done in libsvm_helper.c. If we happen
+to run out of memory a MemoryError will be raised. In practice this is
+not very helpful since high chances are malloc fails inside svm.cpp,
+where no sort of memory checks are done.
+
+[1] https://www.csie.ntu.edu.tw/~cjlin/libsvm/
+
+Notes
+-----
+The signature mode='c' is somewhat superficial, since we already
+check that arrays are C-contiguous in svm.py
+
+Authors
+-------
+2010: Fabian Pedregosa <fabian.pedregosa@inria.fr>
+      Gael Varoquaux <gael.varoquaux@normalesup.org>
+"""
+
+import  numpy as np
+from libc.stdlib cimport free
+from ..utils._cython_blas cimport _dot
+from ..utils._typedefs cimport float64_t, int32_t, intp_t
+
+include "_libsvm.pxi"
+
+cdef extern from *:
+    ctypedef struct svm_parameter:
+        pass
+
+
+################################################################################
+# Internal variables
+LIBSVM_KERNEL_TYPES = ['linear', 'poly', 'rbf', 'sigmoid', 'precomputed']
+
+
+################################################################################
+# Wrapper functions
+
+def fit(
+    const float64_t[:, ::1] X,
+    const float64_t[::1] Y,
+    int svm_type=0,
+    kernel='rbf',
+    int degree=3,
+    double gamma=0.1,
+    double coef0=0.0,
+    double tol=1e-3,
+    double C=1.0,
+    double nu=0.5,
+    double epsilon=0.1,
+    const float64_t[::1] class_weight=np.empty(0),
+    const float64_t[::1] sample_weight=np.empty(0),
+    int shrinking=1,
+    int probability=0,
+    double cache_size=100.,
+    int max_iter=-1,
+    int random_seed=0,
+):
+    """
+    Train the model using libsvm (low-level method)
+
+    Parameters
+    ----------
+    X : array-like, dtype=float64 of shape (n_samples, n_features)
+
+    Y : array, dtype=float64 of shape (n_samples,)
+        target vector
+
+    svm_type : {0, 1, 2, 3, 4}, default=0
+        Type of SVM: C_SVC, NuSVC, OneClassSVM, EpsilonSVR or NuSVR
+        respectively.
+
+    kernel : {'linear', 'rbf', 'poly', 'sigmoid', 'precomputed'}, default="rbf"
+        Kernel to use in the model: linear, polynomial, RBF, sigmoid
+        or precomputed.
+
+    degree : int32, default=3
+        Degree of the polynomial kernel (only relevant if kernel is
+        set to polynomial).
+
+    gamma : float64, default=0.1
+        Gamma parameter in rbf, poly and sigmoid kernels. Ignored by other
+        kernels.
+
+    coef0 : float64, default=0
+        Independent parameter in poly/sigmoid kernel.
+
+    tol : float64, default=1e-3
+        Numeric stopping criterion (WRITEME).
+
+    C : float64, default=1
+        C parameter in C-Support Vector Classification.
+
+    nu : float64, default=0.5
+        An upper bound on the fraction of training errors and a lower bound of
+        the fraction of support vectors. Should be in the interval (0, 1].
+
+    epsilon : double, default=0.1
+        Epsilon parameter in the epsilon-insensitive loss function.
+
+    class_weight : array, dtype=float64, shape (n_classes,), \
+            default=np.empty(0)
+        Set the parameter C of class i to class_weight[i]*C for
+        SVC. If not given, all classes are supposed to have
+        weight one.
+
+    sample_weight : array, dtype=float64, shape (n_samples,), \
+            default=np.empty(0)
+        Weights assigned to each sample.
+
+    shrinking : int, default=1
+        Whether to use the shrinking heuristic.
+
+    probability : int, default=0
+        Whether to enable probability estimates.
+
+    cache_size : float64, default=100
+        Cache size for gram matrix columns (in megabytes).
+
+    max_iter : int (-1 for no limit), default=-1
+        Stop solver after this many iterations regardless of accuracy
+        (XXX Currently there is no API to know whether this kicked in.)
+
+    random_seed : int, default=0
+        Seed for the random number generator used for probability estimates.
+
+    Returns
+    -------
+    support : array of shape (n_support,)
+        Index of support vectors.
+
+    support_vectors : array of shape (n_support, n_features)
+        Support vectors (equivalent to X[support]). Will return an
+        empty array in the case of precomputed kernel.
+
+    n_class_SV : array of shape (n_class,)
+        Number of support vectors in each class.
+
+    sv_coef : array of shape (n_class-1, n_support)
+        Coefficients of support vectors in decision function.
+
+    intercept : array of shape (n_class*(n_class-1)/2,)
+        Intercept in decision function.
+
+    probA, probB : array of shape (n_class*(n_class-1)/2,)
+        Probability estimates, empty array for probability=False.
+
+    n_iter : ndarray of shape (max(1, (n_class * (n_class - 1) // 2)),)
+        Number of iterations run by the optimization routine to fit the model.
+    """
+
+    cdef svm_parameter param
+    cdef svm_problem problem
+    cdef svm_model *model
+    cdef const char *error_msg
+    cdef intp_t SV_len
+
+    if len(sample_weight) == 0:
+        sample_weight = np.ones(X.shape[0], dtype=np.float64)
+    else:
+        assert sample_weight.shape[0] == X.shape[0], (
+            f"sample_weight and X have incompatible shapes: sample_weight has "
+            f"{sample_weight.shape[0]} samples while X has {X.shape[0]}"
+        )
+
+    kernel_index = LIBSVM_KERNEL_TYPES.index(kernel)
+    set_problem(
+        &problem,
+        <char*> &X[0, 0],
+        <char*> &Y[0],
+        <char*> &sample_weight[0],
+        <intp_t*> X.shape,
+        kernel_index,
+    )
+    if problem.x == NULL:
+        raise MemoryError("Seems we've run out of memory")
+    cdef int32_t[::1] class_weight_label = np.arange(
+        class_weight.shape[0], dtype=np.int32
+    )
+    set_parameter(
+        &param,
+        svm_type,
+        kernel_index,
+        degree,
+        gamma,
+        coef0,
+        nu,
+        cache_size,
+        C,
+        tol,
+        epsilon,
+        shrinking,
+        probability,
+        <int> class_weight.shape[0],
+        <char*> &class_weight_label[0] if class_weight_label.size > 0 else NULL,
+        <char*> &class_weight[0] if class_weight.size > 0 else NULL,
+        max_iter,
+        random_seed,
+    )
+
+    error_msg = svm_check_parameter(&problem, &param)
+    if error_msg:
+        # for SVR: epsilon is called p in libsvm
+        error_repl = error_msg.decode('utf-8').replace("p < 0", "epsilon < 0")
+        raise ValueError(error_repl)
+    cdef BlasFunctions blas_functions
+    blas_functions.dot = _dot[double]
+    # this does the real work
+    cdef int fit_status = 0
+    with nogil:
+        model = svm_train(&problem, &param, &fit_status, &blas_functions)
+
+    # from here until the end, we just copy the data returned by
+    # svm_train
+    SV_len = get_l(model)
+    n_class = get_nr(model)
+
+    cdef int[::1] n_iter = np.empty(max(1, n_class * (n_class - 1) // 2), dtype=np.intc)
+    copy_n_iter(<char*> &n_iter[0], model)
+
+    cdef float64_t[:, ::1] sv_coef = np.empty((n_class-1, SV_len), dtype=np.float64)
+    copy_sv_coef(<char*> &sv_coef[0, 0] if sv_coef.size > 0 else NULL, model)
+
+    # the intercept is just model.rho but with sign changed
+    cdef float64_t[::1] intercept = np.empty(
+        int((n_class*(n_class-1))/2), dtype=np.float64
+    )
+    copy_intercept(<char*> &intercept[0], model, <intp_t*> intercept.shape)
+
+    cdef int32_t[::1] support = np.empty(SV_len, dtype=np.int32)
+    copy_support(<char*> &support[0] if support.size > 0 else NULL, model)
+
+    # copy model.SV
+    cdef float64_t[:, ::1] support_vectors
+    if kernel_index == 4:
+        # precomputed kernel
+        support_vectors = np.empty((0, 0), dtype=np.float64)
+    else:
+        support_vectors = np.empty((SV_len, X.shape[1]), dtype=np.float64)
+        copy_SV(
+            <char*> &support_vectors[0, 0] if support_vectors.size > 0 else NULL,
+            model,
+            <intp_t*> support_vectors.shape,
+        )
+
+    cdef int32_t[::1] n_class_SV
+    if svm_type == 0 or svm_type == 1:
+        n_class_SV = np.empty(n_class, dtype=np.int32)
+        copy_nSV(<char*> &n_class_SV[0] if n_class_SV.size > 0 else NULL, model)
+    else:
+        # OneClass and SVR are considered to have 2 classes
+        n_class_SV = np.array([SV_len, SV_len], dtype=np.int32)
+
+    cdef float64_t[::1] probA
+    cdef float64_t[::1] probB
+    if probability != 0:
+        if svm_type < 2:  # SVC and NuSVC
+            probA = np.empty(int(n_class*(n_class-1)/2), dtype=np.float64)
+            probB = np.empty(int(n_class*(n_class-1)/2), dtype=np.float64)
+            copy_probB(<char*> &probB[0], model, <intp_t*> probB.shape)
+        else:
+            probA = np.empty(1, dtype=np.float64)
+            probB = np.empty(0, dtype=np.float64)
+        copy_probA(<char*> &probA[0], model, <intp_t*> probA.shape)
+    else:
+        probA = np.empty(0, dtype=np.float64)
+        probB = np.empty(0, dtype=np.float64)
+
+    svm_free_and_destroy_model(&model)
+    free(problem.x)
+
+    return (
+        support.base,
+        support_vectors.base,
+        n_class_SV.base,
+        sv_coef.base,
+        intercept.base,
+        probA.base,
+        probB.base,
+        fit_status,
+        n_iter.base,
+    )
+
+
+cdef void set_predict_params(
+    svm_parameter *param,
+    int svm_type,
+    kernel,
+    int degree,
+    double gamma,
+    double coef0,
+    double cache_size,
+    int probability,
+    int nr_weight,
+    char *weight_label,
+    char *weight,
+) except *:
+    """Fill param with prediction time-only parameters."""
+
+    # training-time only parameters
+    cdef double C = 0.0
+    cdef double epsilon = 0.1
+    cdef int max_iter = 0
+    cdef double nu = 0.5
+    cdef int shrinking = 0
+    cdef double tol = 0.1
+    cdef int random_seed = -1
+
+    kernel_index = LIBSVM_KERNEL_TYPES.index(kernel)
+
+    set_parameter(
+        param,
+        svm_type,
+        kernel_index,
+        degree,
+        gamma,
+        coef0,
+        nu,
+        cache_size,
+        C,
+        tol,
+        epsilon,
+        shrinking,
+        probability,
+        nr_weight,
+        weight_label,
+        weight,
+        max_iter,
+        random_seed,
+    )
+
+
+def predict(
+    const float64_t[:, ::1] X,
+    const int32_t[::1] support,
+    const float64_t[:, ::1] SV,
+    const int32_t[::1] nSV,
+    const float64_t[:, ::1] sv_coef,
+    const float64_t[::1] intercept,
+    const float64_t[::1] probA=np.empty(0),
+    const float64_t[::1] probB=np.empty(0),
+    int svm_type=0,
+    kernel='rbf',
+    int degree=3,
+    double gamma=0.1,
+    double coef0=0.0,
+    const float64_t[::1] class_weight=np.empty(0),
+    const float64_t[::1] sample_weight=np.empty(0),
+    double cache_size=100.0,
+):
+    """
+    Predict target values of X given a model (low-level method)
+
+    Parameters
+    ----------
+    X : array-like, dtype=float of shape (n_samples, n_features)
+
+    support : array of shape (n_support,)
+        Index of support vectors in training set.
+
+    SV : array of shape (n_support, n_features)
+        Support vectors.
+
+    nSV : array of shape (n_class,)
+        Number of support vectors in each class.
+
+    sv_coef : array of shape (n_class-1, n_support)
+        Coefficients of support vectors in decision function.
+
+    intercept : array of shape (n_class*(n_class-1)/2)
+        Intercept in decision function.
+
+    probA, probB : array of shape (n_class*(n_class-1)/2,)
+        Probability estimates.
+
+    svm_type : {0, 1, 2, 3, 4}, default=0
+        Type of SVM: C_SVC, NuSVC, OneClassSVM, EpsilonSVR or NuSVR
+        respectively.
+
+    kernel : {'linear', 'rbf', 'poly', 'sigmoid', 'precomputed'}, default="rbf"
+        Kernel to use in the model: linear, polynomial, RBF, sigmoid
+        or precomputed.
+
+    degree : int32, default=3
+        Degree of the polynomial kernel (only relevant if kernel is
+        set to polynomial).
+
+    gamma : float64, default=0.1
+        Gamma parameter in rbf, poly and sigmoid kernels. Ignored by other
+        kernels.
+
+    coef0 : float64, default=0.0
+        Independent parameter in poly/sigmoid kernel.
+
+    Returns
+    -------
+    dec_values : array
+        Predicted values.
+    """
+    cdef float64_t[::1] dec_values
+    cdef svm_parameter param
+    cdef svm_model *model
+    cdef int rv
+
+    cdef int32_t[::1] class_weight_label = np.arange(
+        class_weight.shape[0], dtype=np.int32
+    )
+
+    set_predict_params(
+        &param,
+        svm_type,
+        kernel,
+        degree,
+        gamma,
+        coef0,
+        cache_size,
+        0,
+        <int>class_weight.shape[0],
+        <char*> &class_weight_label[0] if class_weight_label.size > 0 else NULL,
+        <char*> &class_weight[0] if class_weight.size > 0 else NULL,
+    )
+    model = set_model(
+        &param,
+        <int> nSV.shape[0],
+        <char*> &SV[0, 0] if SV.size > 0 else NULL,
+        <intp_t*> SV.shape,
+        <char*> &support[0] if support.size > 0 else NULL,
+        <intp_t*> support.shape,
+        <intp_t*> sv_coef.strides,
+        <char*> &sv_coef[0, 0] if sv_coef.size > 0 else NULL,
+        <char*> &intercept[0],
+        <char*> &nSV[0],
+        <char*> &probA[0] if probA.size > 0 else NULL,
+        <char*> &probB[0] if probB.size > 0 else NULL,
+    )
+    cdef BlasFunctions blas_functions
+    blas_functions.dot = _dot[double]
+    # TODO: use check_model
+    try:
+        dec_values = np.empty(X.shape[0])
+        with nogil:
+            rv = copy_predict(
+                <char*> &X[0, 0],
+                model,
+                <intp_t*> X.shape,
+                <char*> &dec_values[0],
+                &blas_functions,
+            )
+        if rv < 0:
+            raise MemoryError("We've run out of memory")
+    finally:
+        free_model(model)
+
+    return dec_values.base
+
+
+def predict_proba(
+    const float64_t[:, ::1] X,
+    const int32_t[::1] support,
+    const float64_t[:, ::1] SV,
+    const int32_t[::1] nSV,
+    float64_t[:, ::1] sv_coef,
+    float64_t[::1] intercept,
+    float64_t[::1] probA=np.empty(0),
+    float64_t[::1] probB=np.empty(0),
+    int svm_type=0,
+    kernel='rbf',
+    int degree=3,
+    double gamma=0.1,
+    double coef0=0.0,
+    float64_t[::1] class_weight=np.empty(0),
+    float64_t[::1] sample_weight=np.empty(0),
+    double cache_size=100.0,
+):
+    """
+    Predict probabilities
+
+    svm_model stores all parameters needed to predict a given value.
+
+    For speed, all real work is done at the C level in function
+    copy_predict (libsvm_helper.c).
+
+    We have to reconstruct model and parameters to make sure we stay
+    in sync with the python object.
+
+    See sklearn.svm.predict for a complete list of parameters.
+
+    Parameters
+    ----------
+    X : array-like, dtype=float of shape (n_samples, n_features)
+
+    support : array of shape (n_support,)
+        Index of support vectors in training set.
+
+    SV : array of shape (n_support, n_features)
+        Support vectors.
+
+    nSV : array of shape (n_class,)
+        Number of support vectors in each class.
+
+    sv_coef : array of shape (n_class-1, n_support)
+        Coefficients of support vectors in decision function.
+
+    intercept : array of shape (n_class*(n_class-1)/2,)
+        Intercept in decision function.
+
+    probA, probB : array of shape (n_class*(n_class-1)/2,)
+        Probability estimates.
+
+    svm_type : {0, 1, 2, 3, 4}, default=0
+        Type of SVM: C_SVC, NuSVC, OneClassSVM, EpsilonSVR or NuSVR
+        respectively.
+
+    kernel : {'linear', 'rbf', 'poly', 'sigmoid', 'precomputed'}, default="rbf"
+        Kernel to use in the model: linear, polynomial, RBF, sigmoid
+        or precomputed.
+
+    degree : int32, default=3
+        Degree of the polynomial kernel (only relevant if kernel is
+        set to polynomial).
+
+    gamma : float64, default=0.1
+        Gamma parameter in rbf, poly and sigmoid kernels. Ignored by other
+        kernels.
+
+    coef0 : float64, default=0.0
+        Independent parameter in poly/sigmoid kernel.
+
+    Returns
+    -------
+    dec_values : array
+        Predicted values.
+    """
+    cdef float64_t[:, ::1] dec_values
+    cdef svm_parameter param
+    cdef svm_model *model
+    cdef int32_t[::1] class_weight_label = np.arange(
+        class_weight.shape[0], dtype=np.int32
+    )
+    cdef int rv
+
+    set_predict_params(
+        &param,
+        svm_type,
+        kernel,
+        degree,
+        gamma,
+        coef0,
+        cache_size,
+        1,
+        <int> class_weight.shape[0],
+        <char*> &class_weight_label[0] if class_weight_label.size > 0 else NULL,
+        <char*> &class_weight[0] if class_weight.size > 0 else NULL,
+    )
+    model = set_model(
+        &param,
+        <int> nSV.shape[0],
+        <char*> &SV[0, 0] if SV.size > 0 else NULL,
+        <intp_t*> SV.shape,
+        <char*> &support[0],
+        <intp_t*> support.shape,
+        <intp_t*> sv_coef.strides,
+        <char*> &sv_coef[0, 0],
+        <char*> &intercept[0],
+        <char*> &nSV[0],
+        <char*> &probA[0] if probA.size > 0 else NULL,
+        <char*> &probB[0] if probB.size > 0 else NULL,
+    )
+
+    cdef intp_t n_class = get_nr(model)
+    cdef BlasFunctions blas_functions
+    blas_functions.dot = _dot[double]
+    try:
+        dec_values = np.empty((X.shape[0], n_class), dtype=np.float64)
+        with nogil:
+            rv = copy_predict_proba(
+                <char*> &X[0, 0],
+                model,
+                <intp_t*> X.shape,
+                <char*> &dec_values[0, 0],
+                &blas_functions,
+            )
+        if rv < 0:
+            raise MemoryError("We've run out of memory")
+    finally:
+        free_model(model)
+
+    return dec_values.base
+
+
+def decision_function(
+    const float64_t[:, ::1] X,
+    const int32_t[::1] support,
+    const float64_t[:, ::1] SV,
+    const int32_t[::1] nSV,
+    const float64_t[:, ::1] sv_coef,
+    const float64_t[::1] intercept,
+    const float64_t[::1] probA=np.empty(0),
+    const float64_t[::1] probB=np.empty(0),
+    int svm_type=0,
+    kernel='rbf',
+    int degree=3,
+    double gamma=0.1,
+    double coef0=0.0,
+    const float64_t[::1] class_weight=np.empty(0),
+    const float64_t[::1] sample_weight=np.empty(0),
+    double cache_size=100.0,
+):
+    """
+    Predict margin (libsvm name for this is predict_values)
+
+    We have to reconstruct model and parameters to make sure we stay
+    in sync with the python object.
+
+    Parameters
+    ----------
+    X : array-like, dtype=float, size=[n_samples, n_features]
+
+    support : array, shape=[n_support]
+        Index of support vectors in training set.
+
+    SV : array, shape=[n_support, n_features]
+        Support vectors.
+
+    nSV : array, shape=[n_class]
+        Number of support vectors in each class.
+
+    sv_coef : array, shape=[n_class-1, n_support]
+        Coefficients of support vectors in decision function.
+
+    intercept : array, shape=[n_class*(n_class-1)/2]
+        Intercept in decision function.
+
+    probA, probB : array, shape=[n_class*(n_class-1)/2]
+        Probability estimates.
+
+    svm_type : {0, 1, 2, 3, 4}, optional
+        Type of SVM: C_SVC, NuSVC, OneClassSVM, EpsilonSVR or NuSVR
+        respectively. 0 by default.
+
+    kernel : {'linear', 'rbf', 'poly', 'sigmoid', 'precomputed'}, optional
+        Kernel to use in the model: linear, polynomial, RBF, sigmoid
+        or precomputed. 'rbf' by default.
+
+    degree : int32, optional
+        Degree of the polynomial kernel (only relevant if kernel is
+        set to polynomial), 3 by default.
+
+    gamma : float64, optional
+        Gamma parameter in rbf, poly and sigmoid kernels. Ignored by other
+        kernels. 0.1 by default.
+
+    coef0 : float64, optional
+        Independent parameter in poly/sigmoid kernel. 0 by default.
+
+    Returns
+    -------
+    dec_values : array
+        Predicted values.
+    """
+    cdef float64_t[:, ::1] dec_values
+    cdef svm_parameter param
+    cdef svm_model *model
+    cdef intp_t n_class
+
+    cdef int32_t[::1] class_weight_label = np.arange(
+        class_weight.shape[0], dtype=np.int32
+    )
+
+    cdef int rv
+
+    set_predict_params(
+        &param,
+        svm_type,
+        kernel,
+        degree,
+        gamma,
+        coef0,
+        cache_size,
+        0,
+        <int> class_weight.shape[0],
+        <char*> &class_weight_label[0] if class_weight_label.size > 0 else NULL,
+        <char*> &class_weight[0] if class_weight.size > 0 else NULL,
+    )
+
+    model = set_model(
+        &param,
+        <int> nSV.shape[0],
+        <char*> &SV[0, 0] if SV.size > 0 else NULL,
+        <intp_t*> SV.shape,
+        <char*> &support[0],
+        <intp_t*> support.shape,
+        <intp_t*> sv_coef.strides,
+        <char*> &sv_coef[0, 0],
+        <char*> &intercept[0],
+        <char*> &nSV[0],
+        <char*> &probA[0] if probA.size > 0 else NULL,
+        <char*> &probB[0] if probB.size > 0 else NULL,
+    )
+
+    if svm_type > 1:
+        n_class = 1
+    else:
+        n_class = get_nr(model)
+        n_class = n_class * (n_class - 1) // 2
+    cdef BlasFunctions blas_functions
+    blas_functions.dot = _dot[double]
+    try:
+        dec_values = np.empty((X.shape[0], n_class), dtype=np.float64)
+        with nogil:
+            rv = copy_predict_values(
+                <char*> &X[0, 0],
+                model,
+                <intp_t*> X.shape,
+                <char*> &dec_values[0, 0],
+                n_class,
+                &blas_functions,
+            )
+        if rv < 0:
+            raise MemoryError("We've run out of memory")
+    finally:
+        free_model(model)
+
+    return dec_values.base
+
+
+def cross_validation(
+    const float64_t[:, ::1] X,
+    const float64_t[::1] Y,
+    int n_fold,
+    int svm_type=0,
+    kernel='rbf',
+    int degree=3,
+    double gamma=0.1,
+    double coef0=0.0,
+    double tol=1e-3,
+    double C=1.0,
+    double nu=0.5,
+    double epsilon=0.1,
+    float64_t[::1] class_weight=np.empty(0),
+    float64_t[::1] sample_weight=np.empty(0),
+    int shrinking=0,
+    int probability=0,
+    double cache_size=100.0,
+    int max_iter=-1,
+    int random_seed=0,
+):
+    """
+    Binding of the cross-validation routine (low-level routine)
+
+    Parameters
+    ----------
+
+    X : array-like, dtype=float of shape (n_samples, n_features)
+
+    Y : array, dtype=float of shape (n_samples,)
+        target vector
+
+    n_fold : int32
+        Number of folds for cross validation.
+
+    svm_type : {0, 1, 2, 3, 4}, default=0
+        Type of SVM: C_SVC, NuSVC, OneClassSVM, EpsilonSVR or NuSVR
+        respectively.
+
+    kernel : {'linear', 'rbf', 'poly', 'sigmoid', 'precomputed'}, default='rbf'
+        Kernel to use in the model: linear, polynomial, RBF, sigmoid
+        or precomputed.
+
+    degree : int32, default=3
+        Degree of the polynomial kernel (only relevant if kernel is
+        set to polynomial).
+
+    gamma : float64, default=0.1
+        Gamma parameter in rbf, poly and sigmoid kernels. Ignored by other
+        kernels.
+
+    coef0 : float64, default=0.0
+        Independent parameter in poly/sigmoid kernel.
+
+    tol : float64, default=1e-3
+        Numeric stopping criterion (WRITEME).
+
+    C : float64, default=1
+        C parameter in C-Support Vector Classification.
+
+    nu : float64, default=0.5
+        An upper bound on the fraction of training errors and a lower bound of
+        the fraction of support vectors. Should be in the interval (0, 1].
+
+    epsilon : double, default=0.1
+        Epsilon parameter in the epsilon-insensitive loss function.
+
+    class_weight : array, dtype=float64, shape (n_classes,), \
+            default=np.empty(0)
+        Set the parameter C of class i to class_weight[i]*C for
+        SVC. If not given, all classes are supposed to have
+        weight one.
+
+    sample_weight : array, dtype=float64, shape (n_samples,), \
+            default=np.empty(0)
+        Weights assigned to each sample.
+
+    shrinking : int, default=1
+        Whether to use the shrinking heuristic.
+
+    probability : int, default=0
+        Whether to enable probability estimates.
+
+    cache_size : float64, default=100
+        Cache size for gram matrix columns (in megabytes).
+
+    max_iter : int (-1 for no limit), default=-1
+        Stop solver after this many iterations regardless of accuracy
+        (XXX Currently there is no API to know whether this kicked in.)
+
+    random_seed : int, default=0
+        Seed for the random number generator used for probability estimates.
+
+    Returns
+    -------
+    target : array, float
+
+    """
+
+    cdef svm_parameter param
+    cdef svm_problem problem
+    cdef const char *error_msg
+
+    if len(sample_weight) == 0:
+        sample_weight = np.ones(X.shape[0], dtype=np.float64)
+    else:
+        assert sample_weight.shape[0] == X.shape[0], (
+            f"sample_weight and X have incompatible shapes: sample_weight has "
+            f"{sample_weight.shape[0]} samples while X has {X.shape[0]}"
+        )
+
+    if X.shape[0] < n_fold:
+        raise ValueError("Number of samples is less than number of folds")
+
+    # set problem
+    kernel_index = LIBSVM_KERNEL_TYPES.index(kernel)
+    set_problem(
+        &problem,
+        <char*> &X[0, 0],
+        <char*> &Y[0],
+        <char*> &sample_weight[0] if sample_weight.size > 0 else NULL,
+        <intp_t*> X.shape,
+        kernel_index,
+    )
+    if problem.x == NULL:
+        raise MemoryError("Seems we've run out of memory")
+    cdef int32_t[::1] class_weight_label = np.arange(
+        class_weight.shape[0], dtype=np.int32
+    )
+
+    # set parameters
+    set_parameter(
+        &param,
+        svm_type,
+        kernel_index,
+        degree,
+        gamma,
+        coef0,
+        nu,
+        cache_size,
+        C,
+        tol,
+        tol,
+        shrinking,
+        probability,
+        <int> class_weight.shape[0],
+        <char*> &class_weight_label[0] if class_weight_label.size > 0 else NULL,
+        <char*> &class_weight[0] if class_weight.size > 0 else NULL,
+        max_iter,
+        random_seed,
+    )
+
+    error_msg = svm_check_parameter(&problem, &param)
+    if error_msg:
+        raise ValueError(error_msg)
+
+    cdef float64_t[::1] target
+    cdef BlasFunctions blas_functions
+    blas_functions.dot = _dot[double]
+    try:
+        target = np.empty((X.shape[0]), dtype=np.float64)
+        with nogil:
+            svm_cross_validation(
+                &problem,
+                &param,
+                n_fold,
+                <double *> &target[0],
+                &blas_functions,
+            )
+    finally:
+        free(problem.x)
+
+    return target.base
+
+
+def set_verbosity_wrap(int verbosity):
+    """
+    Control verbosity of libsvm library
+    """
+    set_verbosity(verbosity)
@@ -0,0 +1,550 @@
+import  numpy as np
+from scipy import sparse
+from ..utils._cython_blas cimport _dot
+from ..utils._typedefs cimport float64_t, int32_t, intp_t
+
+cdef extern from *:
+    ctypedef char* const_char_p "const char*"
+
+################################################################################
+# Includes
+
+cdef extern from "_svm_cython_blas_helpers.h":
+    ctypedef double (*dot_func)(int, const double*, int, const double*, int)
+    cdef struct BlasFunctions:
+        dot_func dot
+
+cdef extern from "svm.h":
+    cdef struct svm_csr_node
+    cdef struct svm_csr_model
+    cdef struct svm_parameter
+    cdef struct svm_csr_problem
+    char *svm_csr_check_parameter(svm_csr_problem *, svm_parameter *)
+    svm_csr_model *svm_csr_train(svm_csr_problem *, svm_parameter *, int *, BlasFunctions *) nogil
+    void svm_csr_free_and_destroy_model(svm_csr_model** model_ptr_ptr)
+
+cdef extern from "libsvm_sparse_helper.c":
+    # this file contains methods for accessing libsvm 'hidden' fields
+    svm_csr_problem * csr_set_problem (
+        char *, intp_t *, char *, intp_t *, char *, char *, char *, int)
+    svm_csr_model *csr_set_model(svm_parameter *param, int nr_class,
+                                 char *SV_data, intp_t *SV_indices_dims,
+                                 char *SV_indices, intp_t *SV_intptr_dims,
+                                 char *SV_intptr,
+                                 char *sv_coef, char *rho, char *nSV,
+                                 char *probA, char *probB)
+    svm_parameter *set_parameter (int , int , int , double, double ,
+                                  double , double , double , double,
+                                  double, int, int, int, char *, char *, int,
+                                  int)
+    void copy_sv_coef   (char *, svm_csr_model *)
+    void copy_n_iter  (char *, svm_csr_model *)
+    void copy_support   (char *, svm_csr_model *)
+    void copy_intercept (char *, svm_csr_model *, intp_t *)
+    int copy_predict (char *, svm_csr_model *, intp_t *, char *, BlasFunctions *)
+    int csr_copy_predict_values (intp_t *data_size, char *data, intp_t *index_size,
+                                 char *index, intp_t *intptr_size, char *size,
+                                 svm_csr_model *model, char *dec_values, int nr_class, BlasFunctions *)
+    int csr_copy_predict (intp_t *data_size, char *data, intp_t *index_size,
+                          char *index, intp_t *intptr_size, char *size,
+                          svm_csr_model *model, char *dec_values, BlasFunctions *) nogil
+    int csr_copy_predict_proba (intp_t *data_size, char *data, intp_t *index_size,
+                                char *index, intp_t *intptr_size, char *size,
+                                svm_csr_model *model, char *dec_values, BlasFunctions *) nogil
+
+    int  copy_predict_values(char *, svm_csr_model *, intp_t *, char *, int, BlasFunctions *)
+    int  csr_copy_SV (char *values, intp_t *n_indices,
+                      char *indices, intp_t *n_indptr, char *indptr,
+                      svm_csr_model *model, int n_features)
+    intp_t get_nonzero_SV (svm_csr_model *)
+    void copy_nSV     (char *, svm_csr_model *)
+    void copy_probA   (char *, svm_csr_model *, intp_t *)
+    void copy_probB   (char *, svm_csr_model *, intp_t *)
+    intp_t  get_l  (svm_csr_model *)
+    intp_t  get_nr (svm_csr_model *)
+    int  free_problem   (svm_csr_problem *)
+    int  free_model     (svm_csr_model *)
+    int  free_param     (svm_parameter *)
+    int free_model_SV(svm_csr_model *model)
+    void set_verbosity(int)
+
+
+def libsvm_sparse_train (int n_features,
+                         const float64_t[::1] values,
+                         const int32_t[::1] indices,
+                         const int32_t[::1] indptr,
+                         const float64_t[::1] Y,
+                         int svm_type, int kernel_type, int degree, double gamma,
+                         double coef0, double eps, double C,
+                         const float64_t[::1] class_weight,
+                         const float64_t[::1] sample_weight,
+                         double nu, double cache_size, double p, int
+                         shrinking, int probability, int max_iter,
+                         int random_seed):
+    """
+    Wrap svm_train from libsvm using a scipy.sparse.csr matrix
+
+    Work in progress.
+
+    Parameters
+    ----------
+    n_features : number of features.
+        XXX: can we retrieve this from any other parameter ?
+
+    X : array-like, dtype=float, size=[N, D]
+
+    Y : array, dtype=float, size=[N]
+        target vector
+
+    ...
+
+    Notes
+    -------------------
+    See sklearn.svm.predict for a complete list of parameters.
+
+    """
+
+    cdef svm_parameter *param
+    cdef svm_csr_problem *problem
+    cdef svm_csr_model *model
+    cdef const_char_p error_msg
+
+    if len(sample_weight) == 0:
+        sample_weight = np.ones(Y.shape[0], dtype=np.float64)
+    else:
+        assert sample_weight.shape[0] == indptr.shape[0] - 1, \
+               "sample_weight and X have incompatible shapes: " + \
+               "sample_weight has %s samples while X has %s" % \
+               (sample_weight.shape[0], indptr.shape[0] - 1)
+
+    # we should never end up here with a precomputed kernel matrix,
+    # as this is always dense.
+    assert(kernel_type != 4)
+
+    # set libsvm problem
+    problem = csr_set_problem(
+        <char *> &values[0],
+        <intp_t *> indices.shape,
+        <char *> &indices[0],
+        <intp_t *> indptr.shape,
+        <char *> &indptr[0],
+        <char *> &Y[0],
+        <char *> &sample_weight[0],
+        kernel_type,
+    )
+
+    cdef int32_t[::1] \
+        class_weight_label = np.arange(class_weight.shape[0], dtype=np.int32)
+
+    # set parameters
+    param = set_parameter(
+        svm_type,
+        kernel_type,
+        degree,
+        gamma,
+        coef0,
+        nu,
+        cache_size,
+        C,
+        eps,
+        p,
+        shrinking,
+        probability,
+        <int> class_weight.shape[0],
+        <char *> &class_weight_label[0] if class_weight_label.size > 0 else NULL,
+        <char *> &class_weight[0] if class_weight.size > 0 else NULL, max_iter,
+        random_seed,
+    )
+
+    # check parameters
+    if (param == NULL or problem == NULL):
+        raise MemoryError("Seems we've run out of memory")
+    error_msg = svm_csr_check_parameter(problem, param)
+    if error_msg:
+        free_problem(problem)
+        free_param(param)
+        raise ValueError(error_msg)
+    cdef BlasFunctions blas_functions
+    blas_functions.dot = _dot[double]
+    # call svm_train, this does the real work
+    cdef int fit_status = 0
+    with nogil:
+        model = svm_csr_train(problem, param, &fit_status, &blas_functions)
+
+    cdef intp_t SV_len = get_l(model)
+    cdef intp_t n_class = get_nr(model)
+
+    cdef int[::1] n_iter
+    n_iter = np.empty(max(1, n_class * (n_class - 1) // 2), dtype=np.intc)
+    copy_n_iter(<char *> &n_iter[0], model)
+
+    # copy model.sv_coef
+    # we create a new array instead of resizing, otherwise
+    # it would not erase previous information
+    cdef float64_t[::1] sv_coef_data
+    sv_coef_data = np.empty((n_class-1)*SV_len, dtype=np.float64)
+    copy_sv_coef (<char *> &sv_coef_data[0] if sv_coef_data.size > 0 else NULL, model)
+
+    cdef int32_t[::1] support
+    support = np.empty(SV_len, dtype=np.int32)
+    copy_support(<char *> &support[0] if support.size > 0 else NULL, model)
+
+    # copy model.rho into the intercept
+    # the intercept is just model.rho but with sign changed
+    cdef float64_t[::1]intercept
+    intercept = np.empty(n_class*(n_class-1)//2, dtype=np.float64)
+    copy_intercept (<char *> &intercept[0], model, <intp_t *> intercept.shape)
+
+    # copy model.SV
+    # we erase any previous information in SV
+    # TODO: custom kernel
+    cdef intp_t nonzero_SV
+    nonzero_SV = get_nonzero_SV (model)
+
+    cdef float64_t[::1] SV_data
+    cdef int32_t[::1] SV_indices, SV_indptr
+    SV_data = np.empty(nonzero_SV, dtype=np.float64)
+    SV_indices = np.empty(nonzero_SV, dtype=np.int32)
+    SV_indptr = np.empty(<intp_t>SV_len + 1, dtype=np.int32)
+    csr_copy_SV(
+        <char *> &SV_data[0] if SV_data.size > 0 else NULL,
+        <intp_t *> SV_indices.shape,
+        <char *> &SV_indices[0] if SV_indices.size > 0 else NULL,
+        <intp_t *> SV_indptr.shape,
+        <char *> &SV_indptr[0] if SV_indptr.size > 0 else NULL,
+        model,
+        n_features,
+    )
+    support_vectors_ = sparse.csr_matrix(
+        (SV_data, SV_indices, SV_indptr), (SV_len, n_features)
+    )
+
+    # copy model.nSV
+    # TODO: do only in classification
+    cdef int32_t[::1]n_class_SV
+    n_class_SV = np.empty(n_class, dtype=np.int32)
+    copy_nSV(<char *> &n_class_SV[0], model)
+
+    # # copy probabilities
+    cdef float64_t[::1] probA, probB
+    if probability != 0:
+        if svm_type < 2:  # SVC and NuSVC
+            probA = np.empty(n_class*(n_class-1)//2, dtype=np.float64)
+            probB = np.empty(n_class*(n_class-1)//2, dtype=np.float64)
+            copy_probB(<char *> &probB[0], model, <intp_t *> probB.shape)
+        else:
+            probA = np.empty(1, dtype=np.float64)
+            probB = np.empty(0, dtype=np.float64)
+        copy_probA(<char *> &probA[0], model, <intp_t *> probA.shape)
+    else:
+        probA = np.empty(0, dtype=np.float64)
+        probB = np.empty(0, dtype=np.float64)
+
+    svm_csr_free_and_destroy_model (&model)
+    free_problem(problem)
+    free_param(param)
+
+    return (
+        support.base,
+        support_vectors_,
+        sv_coef_data.base,
+        intercept.base,
+        n_class_SV.base,
+        probA.base,
+        probB.base,
+        fit_status,
+        n_iter.base,
+    )
+
+
+def libsvm_sparse_predict (const float64_t[::1] T_data,
+                           const int32_t[::1] T_indices,
+                           const int32_t[::1] T_indptr,
+                           const float64_t[::1] SV_data,
+                           const int32_t[::1] SV_indices,
+                           const int32_t[::1] SV_indptr,
+                           const float64_t[::1] sv_coef,
+                           const float64_t[::1]
+                           intercept, int svm_type, int kernel_type, int
+                           degree, double gamma, double coef0, double
+                           eps, double C,
+                           const float64_t[:] class_weight,
+                           double nu, double p, int
+                           shrinking, int probability,
+                           const int32_t[::1] nSV,
+                           const float64_t[::1] probA,
+                           const float64_t[::1] probB):
+    """
+    Predict values T given a model.
+
+    For speed, all real work is done at the C level in function
+    copy_predict (libsvm_helper.c).
+
+    We have to reconstruct model and parameters to make sure we stay
+    in sync with the python object.
+
+    See sklearn.svm.predict for a complete list of parameters.
+
+    Parameters
+    ----------
+    X : array-like, dtype=float
+    Y : array
+        target vector
+
+    Returns
+    -------
+    dec_values : array
+        predicted values.
+    """
+    cdef float64_t[::1] dec_values
+    cdef svm_parameter *param
+    cdef svm_csr_model *model
+    cdef int32_t[::1] \
+        class_weight_label = np.arange(class_weight.shape[0], dtype=np.int32)
+    cdef int rv
+    param = set_parameter(
+        svm_type,
+        kernel_type,
+        degree,
+        gamma,
+        coef0,
+        nu,
+        100.0,  # cache size has no effect on predict
+        C,
+        eps,
+        p,
+        shrinking,
+        probability,
+        <int> class_weight.shape[0],
+        <char *> &class_weight_label[0] if class_weight_label.size > 0 else NULL,
+        <char *> &class_weight[0] if class_weight.size > 0 else NULL,
+        -1,
+        -1,  # random seed has no effect on predict either
+    )
+
+    model = csr_set_model(
+        param, <int> nSV.shape[0],
+        <char *> &SV_data[0] if SV_data.size > 0 else NULL,
+        <intp_t *>SV_indices.shape,
+        <char *> &SV_indices[0] if SV_indices.size > 0 else NULL,
+        <intp_t *> SV_indptr.shape,
+        <char *> &SV_indptr[0] if SV_indptr.size > 0 else NULL,
+        <char *> &sv_coef[0] if sv_coef.size > 0 else NULL,
+        <char *> &intercept[0],
+        <char *> &nSV[0],
+        <char *> &probA[0] if probA.size > 0 else NULL,
+        <char *> &probB[0] if probB.size > 0 else NULL,
+    )
+    # TODO: use check_model
+    dec_values = np.empty(T_indptr.shape[0]-1)
+    cdef BlasFunctions blas_functions
+    blas_functions.dot = _dot[double]
+    with nogil:
+        rv = csr_copy_predict(
+            <intp_t *> T_data.shape,
+            <char *> &T_data[0],
+            <intp_t *> T_indices.shape,
+            <char *> &T_indices[0],
+            <intp_t *> T_indptr.shape,
+            <char *> &T_indptr[0],
+            model,
+            <char *> &dec_values[0],
+            &blas_functions,
+        )
+    if rv < 0:
+        raise MemoryError("We've run out of memory")
+    # free model and param
+    free_model_SV(model)
+    free_model(model)
+    free_param(param)
+    return dec_values.base
+
+
+def libsvm_sparse_predict_proba(
+    const float64_t[::1] T_data,
+    const int32_t[::1] T_indices,
+    const int32_t[::1] T_indptr,
+    const float64_t[::1] SV_data,
+    const int32_t[::1] SV_indices,
+    const int32_t[::1] SV_indptr,
+    const float64_t[::1] sv_coef,
+    const float64_t[::1]
+    intercept, int svm_type, int kernel_type, int
+    degree, double gamma, double coef0, double
+    eps, double C,
+    const float64_t[:] class_weight,
+    double nu, double p, int shrinking, int probability,
+    const int32_t[::1] nSV,
+    const float64_t[::1] probA,
+    const float64_t[::1] probB,
+):
+    """
+    Predict values T given a model.
+    """
+    cdef float64_t[:, ::1] dec_values
+    cdef svm_parameter *param
+    cdef svm_csr_model *model
+    cdef int32_t[::1] \
+        class_weight_label = np.arange(class_weight.shape[0], dtype=np.int32)
+    param = set_parameter(
+        svm_type,
+        kernel_type,
+        degree,
+        gamma,
+        coef0,
+        nu,
+        100.0,  # cache size has no effect on predict
+        C,
+        eps,
+        p,
+        shrinking,
+        probability,
+        <int> class_weight.shape[0],
+        <char *> &class_weight_label[0] if class_weight_label.size > 0 else NULL,
+        <char *> &class_weight[0] if class_weight.size > 0 else NULL,
+        -1,
+        -1,  # random seed has no effect on predict either
+    )
+
+    model = csr_set_model(
+        param,
+        <int> nSV.shape[0],
+        <char *> &SV_data[0] if SV_data.size > 0 else NULL,
+        <intp_t *> SV_indices.shape,
+        <char *> &SV_indices[0] if SV_indices.size > 0 else NULL,
+        <intp_t *> SV_indptr.shape,
+        <char *> &SV_indptr[0] if SV_indptr.size > 0 else NULL,
+        <char *> &sv_coef[0] if sv_coef.size > 0 else NULL,
+        <char *> &intercept[0],
+        <char *> &nSV[0],
+        <char *> &probA[0] if probA.size > 0 else NULL,
+        <char *> &probB[0] if probB.size > 0 else NULL,
+    )
+    # TODO: use check_model
+    cdef intp_t n_class = get_nr(model)
+    cdef int rv
+    dec_values = np.empty((T_indptr.shape[0]-1, n_class), dtype=np.float64)
+    cdef BlasFunctions blas_functions
+    blas_functions.dot = _dot[double]
+    with nogil:
+        rv = csr_copy_predict_proba(
+            <intp_t *> T_data.shape,
+            <char *> &T_data[0],
+            <intp_t *> T_indices.shape,
+            <char *> &T_indices[0],
+            <intp_t *> T_indptr.shape,
+            <char *> &T_indptr[0],
+            model,
+            <char *> &dec_values[0, 0],
+            &blas_functions,
+        )
+    if rv < 0:
+        raise MemoryError("We've run out of memory")
+    # free model and param
+    free_model_SV(model)
+    free_model(model)
+    free_param(param)
+    return dec_values.base
+
+
+def libsvm_sparse_decision_function(
+    const float64_t[::1] T_data,
+    const int32_t[::1] T_indices,
+    const int32_t[::1] T_indptr,
+    const float64_t[::1] SV_data,
+    const int32_t[::1] SV_indices,
+    const int32_t[::1] SV_indptr,
+    const float64_t[::1] sv_coef,
+    const float64_t[::1]
+    intercept, int svm_type, int kernel_type, int
+    degree, double gamma, double coef0, double
+    eps, double C,
+    const float64_t[:] class_weight,
+    double nu, double p, int shrinking, int probability,
+    const int32_t[::1] nSV,
+    const float64_t[::1] probA,
+    const float64_t[::1] probB,
+):
+    """
+    Predict margin (libsvm name for this is predict_values)
+
+    We have to reconstruct model and parameters to make sure we stay
+    in sync with the python object.
+    """
+    cdef float64_t[:, ::1] dec_values
+    cdef svm_parameter *param
+    cdef intp_t n_class
+
+    cdef svm_csr_model *model
+    cdef int32_t[::1] \
+        class_weight_label = np.arange(class_weight.shape[0], dtype=np.int32)
+    param = set_parameter(
+        svm_type,
+        kernel_type,
+        degree,
+        gamma,
+        coef0,
+        nu,
+        100.0,  # cache size has no effect on predict
+        C,
+        eps,
+        p,
+        shrinking,
+        probability,
+        <int> class_weight.shape[0],
+        <char *> &class_weight_label[0] if class_weight_label.size > 0 else NULL,
+        <char *> &class_weight[0] if class_weight.size > 0 else NULL,
+        -1,
+        -1,
+    )
+
+    model = csr_set_model(
+        param,
+        <int> nSV.shape[0],
+        <char *> &SV_data[0] if SV_data.size > 0 else NULL,
+        <intp_t *> SV_indices.shape,
+        <char *> &SV_indices[0] if SV_indices.size > 0 else NULL,
+        <intp_t *> SV_indptr.shape,
+        <char *> &SV_indptr[0] if SV_indptr.size > 0 else NULL,
+        <char *> &sv_coef[0] if sv_coef.size > 0 else NULL,
+        <char *> &intercept[0],
+        <char *> &nSV[0],
+        <char *> &probA[0] if probA.size > 0 else NULL,
+        <char *> &probB[0] if probB.size > 0 else NULL,
+    )
+
+    if svm_type > 1:
+        n_class = 1
+    else:
+        n_class = get_nr(model)
+        n_class = n_class * (n_class - 1) // 2
+
+    dec_values = np.empty((T_indptr.shape[0] - 1, n_class), dtype=np.float64)
+    cdef BlasFunctions blas_functions
+    blas_functions.dot = _dot[double]
+    if csr_copy_predict_values(
+            <intp_t *> T_data.shape,
+            <char *> &T_data[0],
+            <intp_t *> T_indices.shape,
+            <char *> &T_indices[0],
+            <intp_t *> T_indptr.shape,
+            <char *> &T_indptr[0],
+            model,
+            <char *> &dec_values[0, 0],
+            n_class,
+            &blas_functions,
+    ) < 0:
+        raise MemoryError("We've run out of memory")
+    # free model and param
+    free_model_SV(model)
+    free_model(model)
+    free_param(param)
+
+    return dec_values.base
+
+
+def set_verbosity_wrap(int verbosity):
+    """
+    Control verbosity of libsvm library
+    """
+    set_verbosity(verbosity)
@@ -0,0 +1,13 @@
+"""Wrapper for newrand.h"""
+
+cdef extern from "newrand.h":
+    void set_seed(unsigned int)
+    unsigned int bounded_rand_int(unsigned int)
+
+
+def set_seed_wrap(unsigned int custom_seed):
+    set_seed(custom_seed)
+
+
+def bounded_rand_int_wrap(unsigned int range_):
+    return bounded_rand_int(range_)
@@ -0,0 +1,53 @@
+newrand_include = include_directories('src/newrand')
+libsvm_include = include_directories('src/libsvm')
+liblinear_include = include_directories('src/liblinear')
+
+_newrand = py.extension_module(
+  '_newrand',
+  '_newrand.pyx',
+  override_options: ['cython_language=cpp'],
+  include_directories: [newrand_include],
+  cython_args: cython_args,
+  subdir: 'sklearn/svm',
+  install: true
+)
+
+libsvm_skl = static_library(
+  'libsvm-skl',
+  ['src/libsvm/libsvm_template.cpp'],
+)
+
+py.extension_module(
+  '_libsvm',
+  ['_libsvm.pyx', utils_cython_tree],
+  include_directories: [newrand_include, libsvm_include],
+  link_with: libsvm_skl,
+  cython_args: cython_args,
+  subdir: 'sklearn/svm',
+  install: true
+)
+
+py.extension_module(
+  '_libsvm_sparse',
+  ['_libsvm_sparse.pyx', utils_cython_tree],
+  include_directories: [newrand_include, libsvm_include],
+  link_with: libsvm_skl,
+  cython_args: cython_args,
+  subdir: 'sklearn/svm',
+  install: true
+)
+
+liblinear_skl = static_library(
+  'liblinear-skl',
+  ['src/liblinear/linear.cpp', 'src/liblinear/tron.cpp'],
+)
+
+py.extension_module(
+  '_liblinear',
+  ['_liblinear.pyx', utils_cython_tree],
+  include_directories: [newrand_include, liblinear_include],
+  link_with: [liblinear_skl],
+  cython_args: cython_args,
+  subdir: 'sklearn/svm',
+  install: true
+)
@@ -0,0 +1,31 @@
+
+Copyright (c) 2007-2014 The LIBLINEAR Project.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither name of copyright holders nor the names of its contributors
+may be used to endorse or promote products derived from this software
+without specific prior written permission.
+
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR
+CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
@@ -0,0 +1,16 @@
+#ifndef _CYTHON_BLAS_HELPERS_H
+#define _CYTHON_BLAS_HELPERS_H
+
+typedef double (*dot_func)(int, const double*, int, const double*, int);
+typedef void (*axpy_func)(int, double, const double*, int, double*, int);
+typedef void (*scal_func)(int, double, const double*, int);
+typedef double (*nrm2_func)(int, const double*, int);
+
+typedef struct BlasFunctions{
+    dot_func dot;
+    axpy_func axpy;
+    scal_func scal;
+    nrm2_func nrm2;
+} BlasFunctions;
+
+#endif
@@ -0,0 +1,236 @@
+#include <stdlib.h>
+#define PY_SSIZE_T_CLEAN
+#include <Python.h>
+#include "linear.h"
+
+
+/*
+ * Convert matrix to sparse representation suitable for liblinear. x is
+ * expected to be an array of length n_samples*n_features.
+ *
+ * Whether the matrix is densely or sparsely populated, the fastest way to
+ * convert it to liblinear's sparse format is to calculate the amount of memory
+ * needed and allocate a single big block.
+ *
+ * Special care must be taken with indices, since liblinear indices start at 1
+ * and not at 0.
+ *
+ * If bias is > 0, we append an item at the end.
+ */
+static struct feature_node **dense_to_sparse(char *x, int double_precision,
+        int n_samples, int n_features, int n_nonzero, double bias)
+{
+    float *x32 = (float *)x;
+    double *x64 = (double *)x;
+    struct feature_node **sparse;
+    int i, j;                           /* number of nonzero elements in row i */
+    struct feature_node *T;             /* pointer to the top of the stack */
+    int have_bias = (bias > 0);
+
+    sparse = malloc (n_samples * sizeof(struct feature_node *));
+    if (sparse == NULL)
+        return NULL;
+
+    n_nonzero += (have_bias+1) * n_samples;
+    T = malloc (n_nonzero * sizeof(struct feature_node));
+    if (T == NULL) {
+        free(sparse);
+        return NULL;
+    }
+
+    for (i=0; i<n_samples; ++i) {
+        sparse[i] = T;
+
+        for (j=1; j<=n_features; ++j) {
+            if (double_precision) {
+                if (*x64 != 0) {
+                    T->value = *x64;
+                    T->index = j;
+                    ++ T;
+                }
+                ++ x64; /* go to next element */
+            } else {
+                if (*x32 != 0) {
+                    T->value = *x32;
+                    T->index = j;
+                    ++ T;
+                }
+                ++ x32; /* go to next element */
+            }
+        }
+
+        /* set bias element */
+        if (have_bias) {
+                T->value = bias;
+                T->index = j;
+                ++ T;
+            }
+
+        /* set sentinel */
+        T->index = -1;
+        ++ T;
+    }
+
+    return sparse;
+}
+
+
+/*
+ * Convert scipy.sparse.csr to liblinear's sparse data structure
+ */
+static struct feature_node **csr_to_sparse(char *x, int double_precision,
+        int *indices, int *indptr, int n_samples, int n_features, int n_nonzero,
+        double bias)
+{
+    float *x32 = (float *)x;
+    double *x64 = (double *)x;
+    struct feature_node **sparse;
+    int i, j=0, k=0, n;
+    struct feature_node *T;
+    int have_bias = (bias > 0);
+
+    sparse = malloc (n_samples * sizeof(struct feature_node *));
+    if (sparse == NULL)
+        return NULL;
+
+    n_nonzero += (have_bias+1) * n_samples;
+    T = malloc (n_nonzero * sizeof(struct feature_node));
+    if (T == NULL) {
+        free(sparse);
+        return NULL;
+    }
+
+    for (i=0; i<n_samples; ++i) {
+        sparse[i] = T;
+        n = indptr[i+1] - indptr[i]; /* count elements in row i */
+
+        for (j=0; j<n; ++j) {
+            T->value = double_precision ? x64[k] : x32[k];
+            T->index = indices[k] + 1; /* liblinear uses 1-based indexing */
+            ++T;
+            ++k;
+        }
+
+        if (have_bias) {
+            T->value = bias;
+            T->index = n_features + 1;
+            ++T;
+            ++j;
+        }
+
+        /* set sentinel */
+        T->index = -1;
+        ++T;
+    }
+
+    return sparse;
+}
+
+struct problem * set_problem(char *X, int double_precision_X, int n_samples,
+        int n_features, int n_nonzero, double bias, char* sample_weight,
+        char *Y)
+{
+    struct problem *problem;
+    /* not performant but simple */
+    problem = malloc(sizeof(struct problem));
+    if (problem == NULL) return NULL;
+    problem->l = n_samples;
+    problem->n = n_features + (bias > 0);
+    problem->y = (double *) Y;
+    problem->W = (double *) sample_weight;
+    problem->x = dense_to_sparse(X, double_precision_X, n_samples, n_features,
+                        n_nonzero, bias);
+    problem->bias = bias;
+
+    if (problem->x == NULL) {
+        free(problem);
+        return NULL;
+    }
+
+    return problem;
+}
+
+struct problem * csr_set_problem (char *X, int double_precision_X,
+        char *indices, char *indptr, int n_samples, int n_features,
+        int n_nonzero, double bias, char *sample_weight, char *Y)
+{
+    struct problem *problem;
+    problem = malloc (sizeof (struct problem));
+    if (problem == NULL) return NULL;
+    problem->l = n_samples;
+    problem->n = n_features + (bias > 0);
+    problem->y = (double *) Y;
+    problem->W = (double *) sample_weight;
+    problem->x = csr_to_sparse(X, double_precision_X, (int *) indices,
+                        (int *) indptr, n_samples, n_features, n_nonzero, bias);
+    problem->bias = bias;
+
+    if (problem->x == NULL) {
+        free(problem);
+        return NULL;
+    }
+
+    return problem;
+}
+
+
+/* Create a parameter struct with and return it */
+struct parameter *set_parameter(int solver_type, double eps, double C,
+                                Py_ssize_t nr_weight, char *weight_label,
+                                char *weight, int max_iter, unsigned seed,
+                                double epsilon)
+{
+    struct parameter *param = malloc(sizeof(struct parameter));
+    if (param == NULL)
+        return NULL;
+
+    set_seed(seed);
+    param->solver_type = solver_type;
+    param->eps = eps;
+    param->C = C;
+    param->p = epsilon;  // epsilon for epsilon-SVR
+    param->nr_weight = (int) nr_weight;
+    param->weight_label = (int *) weight_label;
+    param->weight = (double *) weight;
+    param->max_iter = max_iter;
+    return param;
+}
+
+void copy_w(void *data, struct model *model, int len)
+{
+    memcpy(data, model->w, len * sizeof(double));
+}
+
+double get_bias(struct model *model)
+{
+    return model->bias;
+}
+
+void free_problem(struct problem *problem)
+{
+    free(problem->x[0]);
+    free(problem->x);
+    free(problem);
+}
+
+void free_parameter(struct parameter *param)
+{
+    free(param);
+}
+
+/* rely on built-in facility to control verbose output */
+static void print_null(const char *s) {}
+
+static void print_string_stdout(const char *s)
+{
+    fputs(s ,stdout);
+    fflush(stdout);
+}
+
+/* provide convenience wrapper */
+void set_verbosity(int verbosity_flag){
+    if (verbosity_flag)
+        set_print_string_function(&print_string_stdout);
+    else
+        set_print_string_function(&print_null);
+}
@@ -0,0 +1,87 @@
+#ifndef _LIBLINEAR_H
+#define _LIBLINEAR_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "_cython_blas_helpers.h"
+
+struct feature_node
+{
+	int index;
+	double value;
+};
+
+struct problem
+{
+	int l, n;
+	double *y;
+	struct feature_node **x;
+	double bias;            /* < 0 if no bias term */
+	double *W;
+};
+
+enum { L2R_LR, L2R_L2LOSS_SVC_DUAL, L2R_L2LOSS_SVC, L2R_L1LOSS_SVC_DUAL, MCSVM_CS, L1R_L2LOSS_SVC, L1R_LR, L2R_LR_DUAL, L2R_L2LOSS_SVR = 11, L2R_L2LOSS_SVR_DUAL, L2R_L1LOSS_SVR_DUAL }; /* solver_type */
+
+struct parameter
+{
+	int solver_type;
+
+	/* these are for training only */
+	double eps;	        /* stopping criteria */
+	double C;
+	int nr_weight;
+	int *weight_label;
+	double* weight;
+	int max_iter;
+	double p;
+};
+
+struct model
+{
+	struct parameter param;
+	int nr_class;		/* number of classes */
+	int nr_feature;
+	double *w;
+	int *label;		/* label of each class */
+	double bias;
+	int *n_iter;    /* no. of iterations of each class */
+};
+
+void set_seed(unsigned seed);
+
+struct model* train(const struct problem *prob, const struct parameter *param, BlasFunctions *blas_functions);
+void cross_validation(const struct problem *prob, const struct parameter *param, int nr_fold, double *target);
+
+double predict_values(const struct model *model_, const struct feature_node *x, double* dec_values);
+double predict(const struct model *model_, const struct feature_node *x);
+double predict_probability(const struct model *model_, const struct feature_node *x, double* prob_estimates);
+
+int save_model(const char *model_file_name, const struct model *model_);
+struct model *load_model(const char *model_file_name);
+
+int get_nr_feature(const struct model *model_);
+int get_nr_class(const struct model *model_);
+void get_labels(const struct model *model_, int* label);
+void get_n_iter(const struct model *model_, int* n_iter);
+#if 0
+double get_decfun_coef(const struct model *model_, int feat_idx, int label_idx);
+double get_decfun_bias(const struct model *model_, int label_idx);
+#endif
+
+void free_model_content(struct model *model_ptr);
+void free_and_destroy_model(struct model **model_ptr_ptr);
+void destroy_param(struct parameter *param);
+
+const char *check_parameter(const struct problem *prob, const struct parameter *param);
+int check_probability_model(const struct model *model);
+int check_regression_model(const struct model *model);
+void set_print_string_function(void (*print_func) (const char*));
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _LIBLINEAR_H */
+
@@ -0,0 +1,223 @@
+#include <math.h>
+#include <stdio.h>
+#include <string.h>
+#include <stdarg.h>
+#include "tron.h"
+
+#ifndef min
+template <class T> static inline T min(T x,T y) { return (x<y)?x:y; }
+#endif
+
+#ifndef max
+template <class T> static inline T max(T x,T y) { return (x>y)?x:y; }
+#endif
+
+static void default_print(const char *buf)
+{
+	fputs(buf,stdout);
+	fflush(stdout);
+}
+
+void TRON::info(const char *fmt,...)
+{
+	char buf[BUFSIZ];
+	va_list ap;
+	va_start(ap,fmt);
+	vsprintf(buf,fmt,ap);
+	va_end(ap);
+	(*tron_print_string)(buf);
+}
+
+TRON::TRON(const function *fun_obj, double eps, int max_iter, BlasFunctions *blas)
+{
+	this->fun_obj=const_cast<function *>(fun_obj);
+	this->eps=eps;
+	this->max_iter=max_iter;
+	this->blas=blas;
+	tron_print_string = default_print;
+}
+
+TRON::~TRON()
+{
+}
+
+int TRON::tron(double *w)
+{
+	// Parameters for updating the iterates.
+	double eta0 = 1e-4, eta1 = 0.25, eta2 = 0.75;
+
+	// Parameters for updating the trust region size delta.
+	double sigma1 = 0.25, sigma2 = 0.5, sigma3 = 4;
+
+	int n = fun_obj->get_nr_variable();
+	int i, cg_iter;
+	double delta, snorm;
+	double alpha, f, fnew, prered, actred, gs;
+	int search = 1, iter = 1, inc = 1;
+	double *s = new double[n];
+	double *r = new double[n];
+	double *w_new = new double[n];
+	double *g = new double[n];
+
+	for (i=0; i<n; i++)
+		w[i] = 0;
+
+	f = fun_obj->fun(w);
+	fun_obj->grad(w, g);
+	delta = blas->nrm2(n, g, inc);
+	double gnorm1 = delta;
+	double gnorm = gnorm1;
+
+	if (gnorm <= eps*gnorm1)
+		search = 0;
+
+	iter = 1;
+
+	while (iter <= max_iter && search)
+	{
+		cg_iter = trcg(delta, g, s, r);
+
+		memcpy(w_new, w, sizeof(double)*n);
+		blas->axpy(n, 1.0, s, inc, w_new, inc);
+
+		gs = blas->dot(n, g, inc, s, inc);
+		prered = -0.5*(gs - blas->dot(n, s, inc, r, inc));
+		fnew = fun_obj->fun(w_new);
+
+		// Compute the actual reduction.
+		actred = f - fnew;
+
+		// On the first iteration, adjust the initial step bound.
+		snorm = blas->nrm2(n, s, inc);
+		if (iter == 1)
+			delta = min(delta, snorm);
+
+		// Compute prediction alpha*snorm of the step.
+		if (fnew - f - gs <= 0)
+			alpha = sigma3;
+		else
+			alpha = max(sigma1, -0.5*(gs/(fnew - f - gs)));
+
+		// Update the trust region bound according to the ratio of actual to predicted reduction.
+		if (actred < eta0*prered)
+			delta = min(max(alpha, sigma1)*snorm, sigma2*delta);
+		else if (actred < eta1*prered)
+			delta = max(sigma1*delta, min(alpha*snorm, sigma2*delta));
+		else if (actred < eta2*prered)
+			delta = max(sigma1*delta, min(alpha*snorm, sigma3*delta));
+		else
+			delta = max(delta, min(alpha*snorm, sigma3*delta));
+
+		info("iter %2d act %5.3e pre %5.3e delta %5.3e f %5.3e |g| %5.3e CG %3d\n", iter, actred, prered, delta, f, gnorm, cg_iter);
+
+		if (actred > eta0*prered)
+		{
+			iter++;
+			memcpy(w, w_new, sizeof(double)*n);
+			f = fnew;
+			fun_obj->grad(w, g);
+
+			gnorm = blas->nrm2(n, g, inc);
+			if (gnorm <= eps*gnorm1)
+				break;
+		}
+		if (f < -1.0e+32)
+		{
+			info("WARNING: f < -1.0e+32\n");
+			break;
+		}
+		if (fabs(actred) <= 0 && prered <= 0)
+		{
+			info("WARNING: actred and prered <= 0\n");
+			break;
+		}
+		if (fabs(actred) <= 1.0e-12*fabs(f) &&
+		    fabs(prered) <= 1.0e-12*fabs(f))
+		{
+			info("WARNING: actred and prered too small\n");
+			break;
+		}
+	}
+
+	delete[] g;
+	delete[] r;
+	delete[] w_new;
+	delete[] s;
+	return --iter;
+}
+
+int TRON::trcg(double delta, double *g, double *s, double *r)
+{
+	int i, inc = 1;
+	int n = fun_obj->get_nr_variable();
+	double *d = new double[n];
+	double *Hd = new double[n];
+	double rTr, rnewTrnew, alpha, beta, cgtol;
+
+	for (i=0; i<n; i++)
+	{
+		s[i] = 0;
+		r[i] = -g[i];
+		d[i] = r[i];
+	}
+	cgtol = 0.1 * blas->nrm2(n, g, inc);
+
+	int cg_iter = 0;
+	rTr = blas->dot(n, r, inc, r, inc);
+	while (1)
+	{
+		if (blas->nrm2(n, r, inc) <= cgtol)
+			break;
+		cg_iter++;
+		fun_obj->Hv(d, Hd);
+
+		alpha = rTr / blas->dot(n, d, inc, Hd, inc);
+		blas->axpy(n, alpha, d, inc, s, inc);
+		if (blas->nrm2(n, s, inc) > delta)
+		{
+			info("cg reaches trust region boundary\n");
+			alpha = -alpha;
+			blas->axpy(n, alpha, d, inc, s, inc);
+
+			double std = blas->dot(n, s, inc, d, inc);
+			double sts = blas->dot(n, s, inc, s, inc);
+			double dtd = blas->dot(n, d, inc, d, inc);
+			double dsq = delta*delta;
+			double rad = sqrt(std*std + dtd*(dsq-sts));
+			if (std >= 0)
+				alpha = (dsq - sts)/(std + rad);
+			else
+				alpha = (rad - std)/dtd;
+			blas->axpy(n, alpha, d, inc, s, inc);
+			alpha = -alpha;
+			blas->axpy(n, alpha, Hd, inc, r, inc);
+			break;
+		}
+		alpha = -alpha;
+		blas->axpy(n, alpha, Hd, inc, r, inc);
+		rnewTrnew = blas->dot(n, r, inc, r, inc);
+		beta = rnewTrnew/rTr;
+		blas->scal(n, beta, d, inc);
+		blas->axpy(n, 1.0, r, inc, d, inc);
+		rTr = rnewTrnew;
+	}
+
+	delete[] d;
+	delete[] Hd;
+
+	return(cg_iter);
+}
+
+double TRON::norm_inf(int n, double *x)
+{
+	double dmax = fabs(x[0]);
+	for (int i=1; i<n; i++)
+		if (fabs(x[i]) >= dmax)
+			dmax = fabs(x[i]);
+	return(dmax);
+}
+
+void TRON::set_print_string(void (*print_string) (const char *buf))
+{
+	tron_print_string = print_string;
+}
@@ -0,0 +1,37 @@
+#ifndef _TRON_H
+#define _TRON_H
+
+#include "_cython_blas_helpers.h"
+
+class function
+{
+public:
+	virtual double fun(double *w) = 0 ;
+	virtual void grad(double *w, double *g) = 0 ;
+	virtual void Hv(double *s, double *Hs) = 0 ;
+
+	virtual int get_nr_variable(void) = 0 ;
+	virtual ~function(void){}
+};
+
+class TRON
+{
+public:
+	TRON(const function *fun_obj, double eps = 0.1, int max_iter = 1000, BlasFunctions *blas = 0);
+	~TRON();
+
+	int tron(double *w);
+	void set_print_string(void (*i_print) (const char *buf));
+
+private:
+	int trcg(double delta, double *g, double *s, double *r);
+	double norm_inf(int n, double *x);
+
+	double eps;
+	int max_iter;
+	function *fun_obj;
+	BlasFunctions *blas;
+	void info(const char *fmt,...);
+	void (*tron_print_string)(const char *buf);
+};
+#endif
@@ -0,0 +1,11 @@
+Changes to Libsvm
+
+This is here mainly as checklist for incorporation of new versions of libsvm.
+
+  * Add copyright to files svm.cpp and svm.h
+  * Add random_seed support and call to srand in fit function
+  * Improved random number generator (fix on windows, enhancement on other
+    platforms). See <https://github.com/scikit-learn/scikit-learn/pull/13511#issuecomment-481729756>
+  * invoke scipy blas api for svm kernel function to improve performance with speedup rate of 1.5X to 2X for dense data only. See <https://github.com/scikit-learn/scikit-learn/pull/16530>
+  * Expose the number of iterations run in optimization. See <https://github.com/scikit-learn/scikit-learn/pull/21408>
+The changes made with respect to upstream are detailed in the heading of svm.cpp
@@ -0,0 +1,9 @@
+#ifndef _SVM_CYTHON_BLAS_HELPERS_H
+#define _SVM_CYTHON_BLAS_HELPERS_H
+
+typedef double (*dot_func)(int, const double*, int, const double*, int);
+typedef struct BlasFunctions{
+    dot_func dot;
+} BlasFunctions;
+
+#endif
@@ -0,0 +1,425 @@
+#include <stdlib.h>
+#define PY_SSIZE_T_CLEAN
+#include <Python.h>
+#include "svm.h"
+#include "_svm_cython_blas_helpers.h"
+
+
+#ifndef MAX
+    #define MAX(x, y) (((x) > (y)) ? (x) : (y))
+#endif
+
+
+/*
+ * Some helper methods for libsvm bindings.
+ *
+ * We need to access from python some parameters stored in svm_model
+ * but libsvm does not expose this structure, so we define it here
+ * along some utilities to convert from numpy arrays.
+ *
+ * License: BSD 3 clause
+ *
+ * Author: 2010 Fabian Pedregosa <fabian.pedregosa@inria.fr>
+ */
+
+
+/*
+ * Convert matrix to sparse representation suitable for libsvm. x is
+ * expected to be an array of length nrow*ncol.
+ *
+ * Typically the matrix will be dense, so we speed up the routine for
+ * this case. We create a temporary array temp that collects non-zero
+ * elements and after we just memcpy that to the proper array.
+ *
+ * Special care must be taken with indinces, since libsvm indices start
+ * at 1 and not at 0.
+ *
+ * Strictly speaking, the C standard does not require that structs are
+ * contiguous, but in practice its a reasonable assumption.
+ *
+ */
+struct svm_node *dense_to_libsvm (double *x, Py_ssize_t *dims)
+{
+    struct svm_node *node;
+    Py_ssize_t len_row = dims[1];
+    double *tx = x;
+    int i;
+
+    node = malloc (dims[0] * sizeof(struct svm_node));
+
+    if (node == NULL) return NULL;
+    for (i=0; i<dims[0]; ++i) {
+        node[i].values = tx;
+        node[i].dim = (int) len_row;
+        node[i].ind = i; /* only used if kernel=precomputed, but not
+                            too much overhead */
+        tx += len_row;
+    }
+
+    return node;
+}
+
+
+/*
+ * Fill an svm_parameter struct.
+ */
+void set_parameter(struct svm_parameter *param, int svm_type, int kernel_type, int degree,
+		double gamma, double coef0, double nu, double cache_size, double C,
+		double eps, double p, int shrinking, int probability, int nr_weight,
+		char *weight_label, char *weight, int max_iter, int random_seed)
+{
+    param->svm_type = svm_type;
+    param->kernel_type = kernel_type;
+    param->degree = degree;
+    param->coef0 = coef0;
+    param->nu = nu;
+    param->cache_size = cache_size;
+    param->C = C;
+    param->eps = eps;
+    param->p = p;
+    param->shrinking = shrinking;
+    param->probability = probability;
+    param->nr_weight = nr_weight;
+    param->weight_label = (int *) weight_label;
+    param->weight = (double *) weight;
+    param->gamma = gamma;
+    param->max_iter = max_iter;
+    param->random_seed = random_seed;
+}
+
+/*
+ * Fill an svm_problem struct. problem->x will be malloc'd.
+ */
+void set_problem(struct svm_problem *problem, char *X, char *Y, char *sample_weight, Py_ssize_t *dims, int kernel_type)
+{
+    if (problem == NULL) return;
+    problem->l = (int) dims[0]; /* number of samples */
+    problem->y = (double *) Y;
+    problem->x = dense_to_libsvm((double *) X, dims); /* implicit call to malloc */
+    problem->W = (double *) sample_weight;
+}
+
+/*
+ * Create and return an instance of svm_model.
+ *
+ * The copy of model->sv_coef should be straightforward, but
+ * unfortunately to represent a matrix numpy and libsvm use different
+ * approaches, so it requires some iteration.
+ *
+ * Possible issue: on 64 bits, the number of columns that numpy can
+ * store is a long, but libsvm enforces this number (model->l) to be
+ * an int, so we might have numpy matrices that do not fit into libsvm's
+ * data structure.
+ *
+ */
+struct svm_model *set_model(struct svm_parameter *param, int nr_class,
+                            char *SV, Py_ssize_t *SV_dims,
+                            char *support, Py_ssize_t *support_dims,
+                            Py_ssize_t *sv_coef_strides,
+                            char *sv_coef, char *rho, char *nSV,
+                            char *probA, char *probB)
+{
+    struct svm_model *model;
+    double *dsv_coef = (double *) sv_coef;
+    int i, m;
+
+    m = nr_class * (nr_class-1)/2;
+
+    if ((model = malloc(sizeof(struct svm_model))) == NULL)
+        goto model_error;
+    if ((model->nSV = malloc(nr_class * sizeof(int))) == NULL)
+        goto nsv_error;
+    if ((model->label = malloc(nr_class * sizeof(int))) == NULL)
+        goto label_error;
+    if ((model->sv_coef = malloc((nr_class-1)*sizeof(double *))) == NULL)
+        goto sv_coef_error;
+    if ((model->rho = malloc( m * sizeof(double))) == NULL)
+        goto rho_error;
+
+    // This is only allocated in dynamic memory while training.
+    model->n_iter = NULL;
+
+    model->nr_class = nr_class;
+    model->param = *param;
+    model->l = (int) support_dims[0];
+
+    if (param->kernel_type == PRECOMPUTED) {
+        if ((model->SV = malloc ((model->l) * sizeof(struct svm_node))) == NULL)
+            goto SV_error;
+        for (i=0; i<model->l; ++i) {
+            model->SV[i].ind = ((int *) support)[i];
+            model->SV[i].values = NULL;
+        }
+    } else {
+        model->SV = dense_to_libsvm((double *) SV, SV_dims);
+    }
+    /*
+     * regression and one-class does not use nSV, label.
+     * TODO: does this provoke memory leaks (we just malloc'ed them)?
+     */
+    if (param->svm_type < 2) {
+        memcpy(model->nSV, nSV,     model->nr_class * sizeof(int));
+        for(i=0; i < model->nr_class; i++)
+            model->label[i] = i;
+    }
+
+    for (i=0; i < model->nr_class-1; i++) {
+        model->sv_coef[i] = dsv_coef + i*(model->l);
+    }
+
+    for (i=0; i<m; ++i) {
+        (model->rho)[i] = -((double *) rho)[i];
+    }
+
+    /*
+     * just to avoid segfaults, these features are not wrapped but
+     * svm_destroy_model will try to free them.
+     */
+
+    if (param->probability) {
+        if ((model->probA = malloc(m * sizeof(double))) == NULL)
+            goto probA_error;
+        memcpy(model->probA, probA, m * sizeof(double));
+        if ((model->probB = malloc(m * sizeof(double))) == NULL)
+            goto probB_error;
+        memcpy(model->probB, probB, m * sizeof(double));
+    } else {
+        model->probA = NULL;
+        model->probB = NULL;
+    }
+
+    /* We'll free SV ourselves */
+    model->free_sv = 0;
+    return model;
+
+probB_error:
+    free(model->probA);
+probA_error:
+    free(model->SV);
+SV_error:
+    free(model->rho);
+rho_error:
+    free(model->sv_coef);
+sv_coef_error:
+    free(model->label);
+label_error:
+    free(model->nSV);
+nsv_error:
+    free(model);
+model_error:
+    return NULL;
+}
+
+
+
+/*
+ * Get the number of support vectors in a model.
+ */
+Py_ssize_t get_l(struct svm_model *model)
+{
+    return (Py_ssize_t) model->l;
+}
+
+/*
+ * Get the number of classes in a model, = 2 in regression/one class
+ * svm.
+ */
+Py_ssize_t get_nr(struct svm_model *model)
+{
+    return (Py_ssize_t) model->nr_class;
+}
+
+/*
+ * Get the number of iterations run in optimization
+ */
+void copy_n_iter(char *data, struct svm_model *model)
+{
+    const int n_models = MAX(1, model->nr_class * (model->nr_class-1) / 2);
+    memcpy(data, model->n_iter, n_models * sizeof(int));
+}
+
+/*
+ * Some helpers to convert from libsvm sparse data structures
+ * model->sv_coef is a double **, whereas data is just a double *,
+ * so we have to do some stupid copying.
+ */
+void copy_sv_coef(char *data, struct svm_model *model)
+{
+    int i, len = model->nr_class-1;
+    double *temp = (double *) data;
+    for(i=0; i<len; ++i) {
+        memcpy(temp, model->sv_coef[i], sizeof(double) * model->l);
+        temp += model->l;
+    }
+}
+
+void copy_intercept(char *data, struct svm_model *model, Py_ssize_t *dims)
+{
+    /* intercept = -rho */
+    Py_ssize_t i, n = dims[0];
+    double t, *ddata = (double *) data;
+    for (i=0; i<n; ++i) {
+        t = model->rho[i];
+        /* we do this to avoid ugly -0.0 */
+        *ddata = (t != 0) ? -t : 0;
+        ++ddata;
+    }
+}
+
+/*
+ * This is a bit more complex since SV are stored as sparse
+ * structures, so we have to do the conversion on the fly and also
+ * iterate fast over data.
+ */
+void copy_SV(char *data, struct svm_model *model, Py_ssize_t *dims)
+{
+    int i, n = model->l;
+    double *tdata = (double *) data;
+    int dim = model->SV[0].dim;
+    for (i=0; i<n; ++i) {
+        memcpy (tdata, model->SV[i].values, dim * sizeof(double));
+        tdata += dim;
+    }
+}
+
+void copy_support (char *data, struct svm_model *model)
+{
+    memcpy (data, model->sv_ind, (model->l) * sizeof(int));
+}
+
+/*
+ * copy svm_model.nSV, an array with the number of SV for each class
+ * will be NULL in the case of SVR, OneClass
+ */
+void copy_nSV(char *data, struct svm_model *model)
+{
+    if (model->label == NULL) return;
+    memcpy(data, model->nSV, model->nr_class * sizeof(int));
+}
+
+void copy_probA(char *data, struct svm_model *model, Py_ssize_t * dims)
+{
+    memcpy(data, model->probA, dims[0] * sizeof(double));
+}
+
+void copy_probB(char *data, struct svm_model *model, Py_ssize_t * dims)
+{
+    memcpy(data, model->probB, dims[0] * sizeof(double));
+}
+
+/*
+ * Predict using model.
+ *
+ *  It will return -1 if we run out of memory.
+ */
+int copy_predict(char *predict, struct svm_model *model, Py_ssize_t *predict_dims,
+                 char *dec_values, BlasFunctions *blas_functions)
+{
+    double *t = (double *) dec_values;
+    struct svm_node *predict_nodes;
+    Py_ssize_t i;
+
+    predict_nodes = dense_to_libsvm((double *) predict, predict_dims);
+
+    if (predict_nodes == NULL)
+        return -1;
+    for(i=0; i<predict_dims[0]; ++i) {
+        *t = svm_predict(model, &predict_nodes[i], blas_functions);
+        ++t;
+    }
+    free(predict_nodes);
+    return 0;
+}
+
+int copy_predict_values(char *predict, struct svm_model *model,
+                        Py_ssize_t *predict_dims, char *dec_values, int nr_class, BlasFunctions *blas_functions)
+{
+    Py_ssize_t i;
+    struct svm_node *predict_nodes;
+    predict_nodes = dense_to_libsvm((double *) predict, predict_dims);
+    if (predict_nodes == NULL)
+        return -1;
+    for(i=0; i<predict_dims[0]; ++i) {
+        svm_predict_values(model, &predict_nodes[i],
+                                ((double *) dec_values) + i*nr_class,
+				blas_functions);
+    }
+
+    free(predict_nodes);
+    return 0;
+}
+
+
+
+int copy_predict_proba(char *predict, struct svm_model *model, Py_ssize_t *predict_dims,
+                 char *dec_values, BlasFunctions *blas_functions)
+{
+    Py_ssize_t i, n, m;
+    struct svm_node *predict_nodes;
+    n = predict_dims[0];
+    m = (Py_ssize_t) model->nr_class;
+    predict_nodes = dense_to_libsvm((double *) predict, predict_dims);
+    if (predict_nodes == NULL)
+        return -1;
+    for(i=0; i<n; ++i) {
+        svm_predict_probability(model, &predict_nodes[i],
+                                ((double *) dec_values) + i*m,
+				blas_functions);
+    }
+    free(predict_nodes);
+    return 0;
+}
+
+
+/*
+ * Some free routines. Some of them are nontrivial since a lot of
+ * sharing happens across objects (they *must* be called in the
+ * correct order)
+ */
+
+int free_model(struct svm_model *model)
+{
+    /* like svm_free_and_destroy_model, but does not free sv_coef[i] */
+    if (model == NULL) return -1;
+    free(model->SV);
+
+    /* We don't free sv_ind and n_iter, since we did not create them in
+       set_model */
+    /* free(model->sv_ind);
+     * free(model->n_iter);
+     */
+    free(model->sv_coef);
+    free(model->rho);
+    free(model->label);
+    free(model->probA);
+    free(model->probB);
+    free(model->nSV);
+    free(model);
+
+    return 0;
+}
+
+int free_param(struct svm_parameter *param)
+{
+    if (param == NULL) return -1;
+    free(param);
+    return 0;
+}
+
+
+/* borrowed from original libsvm code */
+static void print_null(const char *s) {}
+
+static void print_string_stdout(const char *s)
+{
+	fputs(s,stdout);
+	fflush(stdout);
+}
+
+/* provide convenience wrapper */
+void set_verbosity(int verbosity_flag){
+	if (verbosity_flag)
+		svm_set_print_string_function(&print_string_stdout);
+	else
+		svm_set_print_string_function(&print_null);
+}
@@ -0,0 +1,472 @@
+#include <stdlib.h>
+#define PY_SSIZE_T_CLEAN
+#include <Python.h>
+#include "svm.h"
+#include "_svm_cython_blas_helpers.h"
+
+
+#ifndef MAX
+    #define MAX(x, y) (((x) > (y)) ? (x) : (y))
+#endif
+
+
+/*
+ * Convert scipy.sparse.csr to libsvm's sparse data structure
+ */
+struct svm_csr_node **csr_to_libsvm (double *values, int* indices, int* indptr, int n_samples)
+{
+    struct svm_csr_node **sparse, *temp;
+    int i, j=0, k=0, n;
+    sparse = malloc (n_samples * sizeof(struct svm_csr_node *));
+
+    if (sparse == NULL)
+        return NULL;
+
+    for (i=0; i<n_samples; ++i) {
+        n = indptr[i+1] - indptr[i]; /* count elements in row i */
+        temp = malloc ((n+1) * sizeof(struct svm_csr_node));
+
+        if (temp == NULL) {
+            for (j=0; j<i; j++)
+                free(sparse[j]);
+            free(sparse);
+            return NULL;
+        }
+
+        for (j=0; j<n; ++j) {
+            temp[j].value = values[k];
+            temp[j].index = indices[k] + 1; /* libsvm uses 1-based indexing */
+            ++k;
+        }
+        /* set sentinel */
+        temp[n].index = -1;
+        sparse[i] = temp;
+    }
+
+    return sparse;
+}
+
+
+
+struct svm_parameter * set_parameter(int svm_type, int kernel_type, int degree,
+		double gamma, double coef0, double nu, double cache_size, double C,
+		double eps, double p, int shrinking, int probability, int nr_weight,
+		char *weight_label, char *weight, int max_iter, int random_seed)
+{
+    struct svm_parameter *param;
+    param = malloc(sizeof(struct svm_parameter));
+    if (param == NULL) return NULL;
+    param->svm_type = svm_type;
+    param->kernel_type = kernel_type;
+    param->degree = degree;
+    param->coef0 = coef0;
+    param->nu = nu;
+    param->cache_size = cache_size;
+    param->C = C;
+    param->eps = eps;
+    param->p = p;
+    param->shrinking = shrinking;
+    param->probability = probability;
+    param->nr_weight = nr_weight;
+    param->weight_label = (int *) weight_label;
+    param->weight = (double *) weight;
+    param->gamma = gamma;
+    param->max_iter = max_iter;
+    param->random_seed = random_seed;
+    return param;
+}
+
+
+/*
+ * Create and return a svm_csr_problem struct from a scipy.sparse.csr matrix. It is
+ * up to the user to free resulting structure.
+ *
+ * TODO: precomputed kernel.
+ */
+struct svm_csr_problem * csr_set_problem (char *values, Py_ssize_t *n_indices,
+		char *indices, Py_ssize_t *n_indptr, char *indptr, char *Y,
+                char *sample_weight, int kernel_type) {
+
+    struct svm_csr_problem *problem;
+    problem = malloc (sizeof (struct svm_csr_problem));
+    if (problem == NULL) return NULL;
+    problem->l = (int) n_indptr[0] - 1;
+    problem->y = (double *) Y;
+    problem->x = csr_to_libsvm((double *) values, (int *) indices,
+                               (int *) indptr, problem->l);
+    /* should be removed once we implement weighted samples */
+    problem->W = (double *) sample_weight;
+
+    if (problem->x == NULL) {
+        free(problem);
+        return NULL;
+    }
+    return problem;
+}
+
+
+struct svm_csr_model *csr_set_model(struct svm_parameter *param, int nr_class,
+                            char *SV_data, Py_ssize_t *SV_indices_dims,
+                            char *SV_indices, Py_ssize_t *SV_indptr_dims,
+                            char *SV_intptr,
+                            char *sv_coef, char *rho, char *nSV,
+                            char *probA, char *probB)
+{
+    struct svm_csr_model *model;
+    double *dsv_coef = (double *) sv_coef;
+    int i, m;
+
+    m = nr_class * (nr_class-1)/2;
+
+    if ((model = malloc(sizeof(struct svm_csr_model))) == NULL)
+        goto model_error;
+    if ((model->nSV = malloc(nr_class * sizeof(int))) == NULL)
+        goto nsv_error;
+    if ((model->label = malloc(nr_class * sizeof(int))) == NULL)
+        goto label_error;
+    if ((model->sv_coef = malloc((nr_class-1)*sizeof(double *))) == NULL)
+        goto sv_coef_error;
+    if ((model->rho = malloc( m * sizeof(double))) == NULL)
+        goto rho_error;
+
+    // This is only allocated in dynamic memory while training.
+    model->n_iter = NULL;
+
+    /* in the case of precomputed kernels we do not use
+       dense_to_precomputed because we don't want the leading 0. As
+       indices start at 1 (not at 0) this will work */
+    model->l = (int) SV_indptr_dims[0] - 1;
+    model->SV = csr_to_libsvm((double *) SV_data, (int *) SV_indices,
+                              (int *) SV_intptr, model->l);
+    model->nr_class = nr_class;
+    model->param = *param;
+
+    /*
+     * regression and one-class does not use nSV, label.
+     */
+    if (param->svm_type < 2) {
+        memcpy(model->nSV,   nSV,   model->nr_class * sizeof(int));
+        for(i=0; i < model->nr_class; i++)
+            model->label[i] = i;
+    }
+
+    for (i=0; i < model->nr_class-1; i++) {
+        /*
+         * We cannot squash all this mallocs in a single call since
+         * svm_destroy_model will free each element of the array.
+         */
+        if ((model->sv_coef[i] = malloc((model->l) * sizeof(double))) == NULL) {
+            int j;
+            for (j=0; j<i; j++)
+                free(model->sv_coef[j]);
+            goto sv_coef_i_error;
+        }
+        memcpy(model->sv_coef[i], dsv_coef, (model->l) * sizeof(double));
+        dsv_coef += model->l;
+    }
+
+    for (i=0; i<m; ++i) {
+        (model->rho)[i] = -((double *) rho)[i];
+    }
+
+    /*
+     * just to avoid segfaults, these features are not wrapped but
+     * svm_destroy_model will try to free them.
+     */
+
+    if (param->probability) {
+        if ((model->probA = malloc(m * sizeof(double))) == NULL)
+            goto probA_error;
+        memcpy(model->probA, probA, m * sizeof(double));
+        if ((model->probB = malloc(m * sizeof(double))) == NULL)
+            goto probB_error;
+        memcpy(model->probB, probB, m * sizeof(double));
+    } else {
+        model->probA = NULL;
+        model->probB = NULL;
+    }
+
+    /* We'll free SV ourselves */
+    model->free_sv = 0;
+    return model;
+
+probB_error:
+    free(model->probA);
+probA_error:
+    for (i=0; i < model->nr_class-1; i++)
+        free(model->sv_coef[i]);
+sv_coef_i_error:
+    free(model->rho);
+rho_error:
+    free(model->sv_coef);
+sv_coef_error:
+    free(model->label);
+label_error:
+    free(model->nSV);
+nsv_error:
+    free(model);
+model_error:
+    return NULL;
+}
+
+
+/*
+ * Copy support vectors into a scipy.sparse.csr matrix
+ */
+int csr_copy_SV (char *data, Py_ssize_t *n_indices,
+		char *indices, Py_ssize_t *n_indptr, char *indptr,
+		struct svm_csr_model *model, int n_features)
+{
+	int i, j, k=0, index;
+	double *dvalues = (double *) data;
+	int *iindices = (int *) indices;
+	int *iindptr  = (int *) indptr;
+	iindptr[0] = 0;
+	for (i=0; i<model->l; ++i) { /* iterate over support vectors */
+		index = model->SV[i][0].index;
+        for(j=0; index >=0 ; ++j) {
+        	iindices[k] = index - 1;
+            dvalues[k] = model->SV[i][j].value;
+            index = model->SV[i][j+1].index;
+            ++k;
+        }
+        iindptr[i+1] = k;
+	}
+
+	return 0;
+}
+
+/* get number of nonzero coefficients in support vectors */
+Py_ssize_t get_nonzero_SV (struct svm_csr_model *model) {
+	int i, j;
+	Py_ssize_t count=0;
+	for (i=0; i<model->l; ++i) {
+		j = 0;
+		while (model->SV[i][j].index != -1) {
+			++j;
+			++count;
+		}
+	}
+	return count;
+}
+
+
+/*
+ * Predict using a model, where data is expected to be encoded into a csr matrix.
+ */
+int csr_copy_predict (Py_ssize_t *data_size, char *data, Py_ssize_t *index_size,
+		char *index, Py_ssize_t *intptr_size, char *intptr, struct svm_csr_model *model,
+		char *dec_values, BlasFunctions *blas_functions) {
+    double *t = (double *) dec_values;
+    struct svm_csr_node **predict_nodes;
+    Py_ssize_t i;
+
+    predict_nodes = csr_to_libsvm((double *) data, (int *) index,
+                                  (int *) intptr, intptr_size[0]-1);
+
+    if (predict_nodes == NULL)
+        return -1;
+    for(i=0; i < intptr_size[0] - 1; ++i) {
+        *t = svm_csr_predict(model, predict_nodes[i], blas_functions);
+        free(predict_nodes[i]);
+        ++t;
+    }
+    free(predict_nodes);
+    return 0;
+}
+
+int csr_copy_predict_values (Py_ssize_t *data_size, char *data, Py_ssize_t *index_size,
+                char *index, Py_ssize_t *intptr_size, char *intptr, struct svm_csr_model *model,
+                char *dec_values, int nr_class, BlasFunctions *blas_functions) {
+    struct svm_csr_node **predict_nodes;
+    Py_ssize_t i;
+
+    predict_nodes = csr_to_libsvm((double *) data, (int *) index,
+                                  (int *) intptr, intptr_size[0]-1);
+
+    if (predict_nodes == NULL)
+        return -1;
+    for(i=0; i < intptr_size[0] - 1; ++i) {
+        svm_csr_predict_values(model, predict_nodes[i],
+                               ((double *) dec_values) + i*nr_class,
+			       blas_functions);
+        free(predict_nodes[i]);
+    }
+    free(predict_nodes);
+
+    return 0;
+}
+
+int csr_copy_predict_proba (Py_ssize_t *data_size, char *data, Py_ssize_t *index_size,
+		char *index, Py_ssize_t *intptr_size, char *intptr, struct svm_csr_model *model,
+		char *dec_values, BlasFunctions *blas_functions) {
+
+    struct svm_csr_node **predict_nodes;
+    Py_ssize_t i;
+    int m = model->nr_class;
+
+    predict_nodes = csr_to_libsvm((double *) data, (int *) index,
+                                  (int *) intptr, intptr_size[0]-1);
+
+    if (predict_nodes == NULL)
+        return -1;
+    for(i=0; i < intptr_size[0] - 1; ++i) {
+        svm_csr_predict_probability(
+		model, predict_nodes[i], ((double *) dec_values) + i*m, blas_functions);
+        free(predict_nodes[i]);
+    }
+    free(predict_nodes);
+    return 0;
+}
+
+
+Py_ssize_t get_nr(struct svm_csr_model *model)
+{
+    return (Py_ssize_t) model->nr_class;
+}
+
+void copy_intercept(char *data, struct svm_csr_model *model, Py_ssize_t *dims)
+{
+    /* intercept = -rho */
+    Py_ssize_t i, n = dims[0];
+    double t, *ddata = (double *) data;
+    for (i=0; i<n; ++i) {
+        t = model->rho[i];
+        /* we do this to avoid ugly -0.0 */
+        *ddata = (t != 0) ? -t : 0;
+        ++ddata;
+    }
+}
+
+void copy_support (char *data, struct svm_csr_model *model)
+{
+    memcpy (data, model->sv_ind, (model->l) * sizeof(int));
+}
+
+/*
+ * Some helpers to convert from libsvm sparse data structures
+ * model->sv_coef is a double **, whereas data is just a double *,
+ * so we have to do some stupid copying.
+ */
+void copy_sv_coef(char *data, struct svm_csr_model *model)
+{
+    int i, len = model->nr_class-1;
+    double *temp = (double *) data;
+    for(i=0; i<len; ++i) {
+        memcpy(temp, model->sv_coef[i], sizeof(double) * model->l);
+        temp += model->l;
+    }
+}
+
+/*
+ * Get the number of iterations run in optimization
+ */
+void copy_n_iter(char *data, struct svm_csr_model *model)
+{
+    const int n_models = MAX(1, model->nr_class * (model->nr_class-1) / 2);
+    memcpy(data, model->n_iter, n_models * sizeof(int));
+}
+
+/*
+ * Get the number of support vectors in a model.
+ */
+Py_ssize_t get_l(struct svm_csr_model *model)
+{
+    return (Py_ssize_t) model->l;
+}
+
+void copy_nSV(char *data, struct svm_csr_model *model)
+{
+    if (model->label == NULL) return;
+    memcpy(data, model->nSV, model->nr_class * sizeof(int));
+}
+
+/*
+ * same as above with model->label
+ * TODO: merge in the cython layer
+ */
+void copy_label(char *data, struct svm_csr_model *model)
+{
+    if (model->label == NULL) return;
+    memcpy(data, model->label, model->nr_class * sizeof(int));
+}
+
+void copy_probA(char *data, struct svm_csr_model *model, Py_ssize_t * dims)
+{
+    memcpy(data, model->probA, dims[0] * sizeof(double));
+}
+
+void copy_probB(char *data, struct svm_csr_model *model, Py_ssize_t * dims)
+{
+    memcpy(data, model->probB, dims[0] * sizeof(double));
+}
+
+
+/*
+ * Some free routines. Some of them are nontrivial since a lot of
+ * sharing happens across objects (they *must* be called in the
+ * correct order)
+ */
+int free_problem(struct svm_csr_problem *problem)
+{
+    int i;
+    if (problem == NULL) return -1;
+    for (i=0; i<problem->l; ++i)
+        free (problem->x[i]);
+    free (problem->x);
+    free (problem);
+    return 0;
+}
+
+int free_model(struct svm_csr_model *model)
+{
+    /* like svm_free_and_destroy_model, but does not free sv_coef[i] */
+    /* We don't free n_iter, since we did not create them in set_model. */
+    if (model == NULL) return -1;
+    free(model->SV);
+    free(model->sv_coef);
+    free(model->rho);
+    free(model->label);
+    free(model->probA);
+    free(model->probB);
+    free(model->nSV);
+    free(model);
+
+    return 0;
+}
+
+int free_param(struct svm_parameter *param)
+{
+    if (param == NULL) return -1;
+    free(param);
+    return 0;
+}
+
+
+int free_model_SV(struct svm_csr_model *model)
+{
+    int i;
+    for (i=model->l-1; i>=0; --i) free(model->SV[i]);
+    /* svn_destroy_model frees model->SV */
+    for (i=0; i < model->nr_class-1 ; ++i) free(model->sv_coef[i]);
+    /* svn_destroy_model frees model->sv_coef */
+    return 0;
+}
+
+
+/* borrowed from original libsvm code */
+static void print_null(const char *s) {}
+
+static void print_string_stdout(const char *s)
+{
+	fputs(s,stdout);
+	fflush(stdout);
+}
+
+/* provide convenience wrapper */
+void set_verbosity(int verbosity_flag){
+	if (verbosity_flag)
+		svm_set_print_string_function(&print_string_stdout);
+	else
+		svm_set_print_string_function(&print_null);
+}
@@ -0,0 +1,8 @@
+
+/* this is a hack to generate libsvm with both sparse and dense
+   methods in the same binary*/
+
+#define _DENSE_REP
+#include "svm.cpp"
+#undef _DENSE_REP
+#include "svm.cpp"
@@ -0,0 +1,176 @@
+#ifndef _LIBSVM_H
+#define _LIBSVM_H
+
+#define LIBSVM_VERSION 310
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+#include "_svm_cython_blas_helpers.h"
+
+struct svm_node
+{
+	int dim;
+	int ind; /* index. A bit redundant, but needed if using a
+                    precomputed kernel */
+	double *values;
+};
+
+struct svm_problem
+{
+	int l;
+	double *y;
+	struct svm_node *x;
+	double *W; /* instance weights */
+};
+
+
+struct svm_csr_node
+{
+	int index;
+	double value;
+};
+
+struct svm_csr_problem
+{
+	int l;
+	double *y;
+	struct svm_csr_node **x;
+        double *W; /* instance weights */
+};
+
+
+enum { C_SVC, NU_SVC, ONE_CLASS, EPSILON_SVR, NU_SVR };	/* svm_type */
+enum { LINEAR, POLY, RBF, SIGMOID, PRECOMPUTED }; /* kernel_type */
+
+struct svm_parameter
+{
+	int svm_type;
+	int kernel_type;
+	int degree;	/* for poly */
+	double gamma;	/* for poly/rbf/sigmoid */
+	double coef0;	/* for poly/sigmoid */
+
+	/* these are for training only */
+	double cache_size; /* in MB */
+	double eps;	/* stopping criteria */
+	double C;	/* for C_SVC, EPSILON_SVR and NU_SVR */
+	int nr_weight;		/* for C_SVC */
+	int *weight_label;	/* for C_SVC */
+	double* weight;		/* for C_SVC */
+	double nu;	/* for NU_SVC, ONE_CLASS, and NU_SVR */
+	double p;	/* for EPSILON_SVR */
+	int shrinking;	/* use the shrinking heuristics */
+	int probability; /* do probability estimates */
+	int max_iter; /* ceiling on Solver runtime */
+    int random_seed; /* seed for random number generator */
+};
+
+//
+// svm_model
+//
+struct svm_model
+{
+	struct svm_parameter param;	/* parameter */
+	int nr_class;		/* number of classes, = 2 in regression/one class svm */
+	int l;			/* total #SV */
+	struct svm_node *SV;		/* SVs (SV[l]) */
+	double **sv_coef;	/* coefficients for SVs in decision functions (sv_coef[k-1][l]) */
+	int *n_iter;		/* number of iterations run by the optimization routine to fit the model */
+
+	int *sv_ind;            /* index of support vectors */
+
+	double *rho;		/* constants in decision functions (rho[k*(k-1)/2]) */
+	double *probA;		/* pairwise probability information */
+	double *probB;
+
+	/* for classification only */
+
+	int *label;		/* label of each class (label[k]) */
+	int *nSV;		/* number of SVs for each class (nSV[k]) */
+				/* nSV[0] + nSV[1] + ... + nSV[k-1] = l */
+	/* XXX */
+	int free_sv;		/* 1 if svm_model is created by svm_load_model*/
+				/* 0 if svm_model is created by svm_train */
+};
+
+
+struct svm_csr_model
+{
+	struct svm_parameter param;	/* parameter */
+	int nr_class;		/* number of classes, = 2 in regression/one class svm */
+	int l;			/* total #SV */
+	struct svm_csr_node **SV;		/* SVs (SV[l]) */
+	double **sv_coef;	/* coefficients for SVs in decision functions (sv_coef[k-1][l]) */
+	int *n_iter;		/* number of iterations run by the optimization routine to fit the model */
+
+        int *sv_ind;            /* index of support vectors */
+
+	double *rho;		/* constants in decision functions (rho[k*(k-1)/2]) */
+	double *probA;		/* pairwise probability information */
+	double *probB;
+
+	/* for classification only */
+
+	int *label;		/* label of each class (label[k]) */
+	int *nSV;		/* number of SVs for each class (nSV[k]) */
+				/* nSV[0] + nSV[1] + ... + nSV[k-1] = l */
+	/* XXX */
+	int free_sv;		/* 1 if svm_model is created by svm_load_model*/
+				/* 0 if svm_model is created by svm_train */
+};
+
+/* svm_ functions are defined by libsvm_template.cpp from generic versions in svm.cpp */
+struct svm_model *svm_train(const struct svm_problem *prob, const struct svm_parameter *param, int *status, BlasFunctions *blas_functions);
+void svm_cross_validation(const struct svm_problem *prob, const struct svm_parameter *param, int nr_fold, double *target, BlasFunctions *blas_functions);
+
+int svm_save_model(const char *model_file_name, const struct svm_model *model);
+struct svm_model *svm_load_model(const char *model_file_name);
+
+int svm_get_svm_type(const struct svm_model *model);
+int svm_get_nr_class(const struct svm_model *model);
+void svm_get_labels(const struct svm_model *model, int *label);
+double svm_get_svr_probability(const struct svm_model *model);
+
+double svm_predict_values(const struct svm_model *model, const struct svm_node *x, double* dec_values, BlasFunctions *blas_functions);
+double svm_predict(const struct svm_model *model, const struct svm_node *x, BlasFunctions *blas_functions);
+double svm_predict_probability(const struct svm_model *model, const struct svm_node *x, double* prob_estimates, BlasFunctions *blas_functions);
+
+void svm_free_model_content(struct svm_model *model_ptr);
+void svm_free_and_destroy_model(struct svm_model **model_ptr_ptr);
+void svm_destroy_param(struct svm_parameter *param);
+
+const char *svm_check_parameter(const struct svm_problem *prob, const struct svm_parameter *param);
+
+void svm_set_print_string_function(void (*print_func)(const char *));
+
+
+/* sparse version */
+
+/* svm_csr_ functions are defined by libsvm_template.cpp from generic versions in svm.cpp */
+struct svm_csr_model *svm_csr_train(const struct svm_csr_problem *prob, const struct svm_parameter *param, int *status, BlasFunctions *blas_functions);
+void svm_csr_cross_validation(const struct svm_csr_problem *prob, const struct svm_parameter *param, int nr_fold, double *target, BlasFunctions *blas_functions);
+
+int svm_csr_get_svm_type(const struct svm_csr_model *model);
+int svm_csr_get_nr_class(const struct svm_csr_model *model);
+void svm_csr_get_labels(const struct svm_csr_model *model, int *label);
+double svm_csr_get_svr_probability(const struct svm_csr_model *model);
+
+double svm_csr_predict_values(const struct svm_csr_model *model, const struct svm_csr_node *x, double* dec_values, BlasFunctions *blas_functions);
+double svm_csr_predict(const struct svm_csr_model *model, const struct svm_csr_node *x, BlasFunctions *blas_functions);
+double svm_csr_predict_probability(const struct svm_csr_model *model, const struct svm_csr_node *x, double* prob_estimates, BlasFunctions *blas_functions);
+
+void svm_csr_free_model_content(struct svm_csr_model *model_ptr);
+void svm_csr_free_and_destroy_model(struct svm_csr_model **model_ptr_ptr);
+void svm_csr_destroy_param(struct svm_parameter *param);
+
+const char *svm_csr_check_parameter(const struct svm_csr_problem *prob, const struct svm_parameter *param);
+
+/* end sparse version */
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _LIBSVM_H */
@@ -0,0 +1,59 @@
+/*
+   Creation, 2020:
+   - New random number generator using a mersenne twister + tweaked lemire
+     postprocessor. This fixed a convergence issue on windows targets for
+     libsvm and liblinear.
+     Sylvain Marie, Schneider Electric
+     See <https://github.com/scikit-learn/scikit-learn/pull/13511#issuecomment-481729756>
+ */
+#ifndef _NEWRAND_H
+#define _NEWRAND_H
+
+#ifdef __cplusplus
+#include <random>  // needed for cython to generate a .cpp file from newrand.h
+extern "C" {
+#endif
+
+// Scikit-Learn-specific random number generator replacing `rand()` originally
+// used in LibSVM / LibLinear, to ensure the same behaviour on windows-linux,
+// with increased speed
+// - (1) Init a `mt_rand` object
+std::mt19937 mt_rand(std::mt19937::default_seed);
+
+// - (2) public `set_seed()` function that should be used instead of `srand()` to set a new seed.
+void set_seed(unsigned custom_seed) {
+    mt_rand.seed(custom_seed);
+}
+
+// - (3) New internal `bounded_rand_int` function, used instead of rand() everywhere.
+inline uint32_t bounded_rand_int(uint32_t range) {
+    // "LibSVM / LibLinear Original way" - make a 31bit positive
+    // random number and use modulo to make it fit in the range
+    // return abs( (int)mt_rand()) % range;
+
+    // "Better way": tweaked Lemire post-processor
+    // from http://www.pcg-random.org/posts/bounded-rands.html
+    uint32_t x = mt_rand();
+    uint64_t m = uint64_t(x) * uint64_t(range);
+    uint32_t l = uint32_t(m);
+    if (l < range) {
+        uint32_t t = -range;
+        if (t >= range) {
+            t -= range;
+            if (t >= range)
+                t %= range;
+        }
+        while (l < t) {
+            x = mt_rand();
+            m = uint64_t(x) * uint64_t(range);
+            l = uint32_t(m);
+        }
+    }
+    return m >> 32;
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _NEWRAND_H */
@@ -0,0 +1,142 @@
+import numpy as np
+import pytest
+from scipy import stats
+
+from sklearn.linear_model import LogisticRegression
+from sklearn.svm import LinearSVC
+from sklearn.svm._bounds import l1_min_c
+from sklearn.svm._newrand import bounded_rand_int_wrap, set_seed_wrap
+from sklearn.utils.fixes import CSR_CONTAINERS
+
+dense_X = [[-1, 0], [0, 1], [1, 1], [1, 1]]
+
+Y1 = [0, 1, 1, 1]
+Y2 = [2, 1, 0, 0]
+
+
+@pytest.mark.parametrize("X_container", CSR_CONTAINERS + [np.array])
+@pytest.mark.parametrize("loss", ["squared_hinge", "log"])
+@pytest.mark.parametrize("Y_label", ["two-classes", "multi-class"])
+@pytest.mark.parametrize("intercept_label", ["no-intercept", "fit-intercept"])
+def test_l1_min_c(X_container, loss, Y_label, intercept_label):
+    Ys = {"two-classes": Y1, "multi-class": Y2}
+    intercepts = {
+        "no-intercept": {"fit_intercept": False},
+        "fit-intercept": {"fit_intercept": True, "intercept_scaling": 10},
+    }
+
+    X = X_container(dense_X)
+    Y = Ys[Y_label]
+    intercept_params = intercepts[intercept_label]
+    check_l1_min_c(X, Y, loss, **intercept_params)
+
+
+def check_l1_min_c(X, y, loss, fit_intercept=True, intercept_scaling=1.0):
+    min_c = l1_min_c(
+        X,
+        y,
+        loss=loss,
+        fit_intercept=fit_intercept,
+        intercept_scaling=intercept_scaling,
+    )
+
+    clf = {
+        "log": LogisticRegression(penalty="l1", solver="liblinear"),
+        "squared_hinge": LinearSVC(loss="squared_hinge", penalty="l1", dual=False),
+    }[loss]
+
+    clf.fit_intercept = fit_intercept
+    clf.intercept_scaling = intercept_scaling
+
+    clf.C = min_c
+    clf.fit(X, y)
+    assert (np.asarray(clf.coef_) == 0).all()
+    assert (np.asarray(clf.intercept_) == 0).all()
+
+    clf.C = min_c * 1.01
+    clf.fit(X, y)
+    assert (np.asarray(clf.coef_) != 0).any() or (np.asarray(clf.intercept_) != 0).any()
+
+
+def test_ill_posed_min_c():
+    X = [[0, 0], [0, 0]]
+    y = [0, 1]
+    with pytest.raises(ValueError):
+        l1_min_c(X, y)
+
+
+_MAX_UNSIGNED_INT = 4294967295
+
+
+def test_newrand_default():
+    """Test that bounded_rand_int_wrap without seeding respects the range
+
+    Note this test should pass either if executed alone, or in conjunctions
+    with other tests that call set_seed explicit in any order: it checks
+    invariants on the RNG instead of specific values.
+    """
+    generated = [bounded_rand_int_wrap(100) for _ in range(10)]
+    assert all(0 <= x < 100 for x in generated)
+    assert not all(x == generated[0] for x in generated)
+
+
+@pytest.mark.parametrize("seed, expected", [(0, 54), (_MAX_UNSIGNED_INT, 9)])
+def test_newrand_set_seed(seed, expected):
+    """Test that `set_seed` produces deterministic results"""
+    set_seed_wrap(seed)
+    generated = bounded_rand_int_wrap(100)
+    assert generated == expected
+
+
+@pytest.mark.parametrize("seed", [-1, _MAX_UNSIGNED_INT + 1])
+def test_newrand_set_seed_overflow(seed):
+    """Test that `set_seed_wrap` is defined for unsigned 32bits ints"""
+    with pytest.raises(OverflowError):
+        set_seed_wrap(seed)
+
+
+@pytest.mark.parametrize("range_, n_pts", [(_MAX_UNSIGNED_INT, 10000), (100, 25)])
+def test_newrand_bounded_rand_int(range_, n_pts):
+    """Test that `bounded_rand_int` follows a uniform distribution"""
+    # XXX: this test is very seed sensitive: either it is wrong (too strict?)
+    # or the wrapped RNG is not uniform enough, at least on some platforms.
+    set_seed_wrap(42)
+    n_iter = 100
+    ks_pvals = []
+    uniform_dist = stats.uniform(loc=0, scale=range_)
+    # perform multiple samplings to make chance of outlier sampling negligible
+    for _ in range(n_iter):
+        # Deterministic random sampling
+        sample = [bounded_rand_int_wrap(range_) for _ in range(n_pts)]
+        res = stats.kstest(sample, uniform_dist.cdf)
+        ks_pvals.append(res.pvalue)
+    # Null hypothesis = samples come from an uniform distribution.
+    # Under the null hypothesis, p-values should be uniformly distributed
+    # and not concentrated on low values
+    # (this may seem counter-intuitive but is backed by multiple refs)
+    # So we can do two checks:
+
+    # (1) check uniformity of p-values
+    uniform_p_vals_dist = stats.uniform(loc=0, scale=1)
+    res_pvals = stats.kstest(ks_pvals, uniform_p_vals_dist.cdf)
+    assert res_pvals.pvalue > 0.05, (
+        "Null hypothesis rejected: generated random numbers are not uniform."
+        " Details: the (meta) p-value of the test of uniform distribution"
+        f" of p-values is {res_pvals.pvalue} which is not > 0.05"
+    )
+
+    # (2) (safety belt) check that 90% of p-values are above 0.05
+    min_10pct_pval = np.percentile(ks_pvals, q=10)
+    # lower 10th quantile pvalue <= 0.05 means that the test rejects the
+    # null hypothesis that the sample came from the uniform distribution
+    assert min_10pct_pval > 0.05, (
+        "Null hypothesis rejected: generated random numbers are not uniform. "
+        f"Details: lower 10th quantile p-value of {min_10pct_pval} not > 0.05."
+    )
+
+
+@pytest.mark.parametrize("range_", [-1, _MAX_UNSIGNED_INT + 1])
+def test_newrand_bounded_rand_int_limits(range_):
+    """Test that `bounded_rand_int_wrap` is defined for unsigned 32bits ints"""
+    with pytest.raises(OverflowError):
+        bounded_rand_int_wrap(range_)
@@ -0,0 +1,493 @@
+import numpy as np
+import pytest
+from scipy import sparse
+
+from sklearn import base, datasets, linear_model, svm
+from sklearn.datasets import load_digits, make_blobs, make_classification
+from sklearn.exceptions import ConvergenceWarning
+from sklearn.svm.tests import test_svm
+from sklearn.utils._testing import (
+    assert_allclose,
+    assert_array_almost_equal,
+    assert_array_equal,
+    ignore_warnings,
+    skip_if_32bit,
+)
+from sklearn.utils.extmath import safe_sparse_dot
+from sklearn.utils.fixes import (
+    CSR_CONTAINERS,
+    DOK_CONTAINERS,
+    LIL_CONTAINERS,
+)
+
+# test sample 1
+X = np.array([[-2, -1], [-1, -1], [-1, -2], [1, 1], [1, 2], [2, 1]])
+Y = [1, 1, 1, 2, 2, 2]
+T = np.array([[-1, -1], [2, 2], [3, 2]])
+true_result = [1, 2, 2]
+
+# test sample 2
+X2 = np.array(
+    [
+        [0, 0, 0],
+        [1, 1, 1],
+        [2, 0, 0],
+        [0, 0, 2],
+        [3, 3, 3],
+    ]
+)
+Y2 = [1, 2, 2, 2, 3]
+T2 = np.array([[-1, -1, -1], [1, 1, 1], [2, 2, 2]])
+true_result2 = [1, 2, 3]
+
+iris = datasets.load_iris()
+rng = np.random.RandomState(0)
+perm = rng.permutation(iris.target.size)
+iris.data = iris.data[perm]
+iris.target = iris.target[perm]
+
+X_blobs, y_blobs = make_blobs(n_samples=100, centers=10, random_state=0)
+
+
+def check_svm_model_equal(dense_svm, X_train, y_train, X_test):
+    # Use the original svm model for dense fit and clone an exactly same
+    # svm model for sparse fit
+    sparse_svm = base.clone(dense_svm)
+
+    dense_svm.fit(X_train.toarray(), y_train)
+    if sparse.issparse(X_test):
+        X_test_dense = X_test.toarray()
+    else:
+        X_test_dense = X_test
+    sparse_svm.fit(X_train, y_train)
+    assert sparse.issparse(sparse_svm.support_vectors_)
+    assert sparse.issparse(sparse_svm.dual_coef_)
+    assert_allclose(dense_svm.support_vectors_, sparse_svm.support_vectors_.toarray())
+    assert_allclose(dense_svm.dual_coef_, sparse_svm.dual_coef_.toarray())
+    if dense_svm.kernel == "linear":
+        assert sparse.issparse(sparse_svm.coef_)
+        assert_array_almost_equal(dense_svm.coef_, sparse_svm.coef_.toarray())
+    assert_allclose(dense_svm.support_, sparse_svm.support_)
+    assert_allclose(dense_svm.predict(X_test_dense), sparse_svm.predict(X_test))
+
+    assert_array_almost_equal(
+        dense_svm.decision_function(X_test_dense), sparse_svm.decision_function(X_test)
+    )
+    assert_array_almost_equal(
+        dense_svm.decision_function(X_test_dense),
+        sparse_svm.decision_function(X_test_dense),
+    )
+    if isinstance(dense_svm, svm.OneClassSVM):
+        msg = "cannot use sparse input in 'OneClassSVM' trained on dense data"
+    else:
+        assert_array_almost_equal(
+            dense_svm.predict_proba(X_test_dense),
+            sparse_svm.predict_proba(X_test),
+            decimal=4,
+        )
+        msg = "cannot use sparse input in 'SVC' trained on dense data"
+    if sparse.issparse(X_test):
+        with pytest.raises(ValueError, match=msg):
+            dense_svm.predict(X_test)
+
+
+@skip_if_32bit
+@pytest.mark.parametrize(
+    "X_train, y_train, X_test",
+    [
+        [X, Y, T],
+        [X2, Y2, T2],
+        [X_blobs[:80], y_blobs[:80], X_blobs[80:]],
+        [iris.data, iris.target, iris.data],
+    ],
+)
+@pytest.mark.parametrize("kernel", ["linear", "poly", "rbf", "sigmoid"])
+@pytest.mark.parametrize("sparse_container", CSR_CONTAINERS + LIL_CONTAINERS)
+def test_svc(X_train, y_train, X_test, kernel, sparse_container):
+    """Check that sparse SVC gives the same result as SVC."""
+    X_train = sparse_container(X_train)
+
+    clf = svm.SVC(
+        gamma=1,
+        kernel=kernel,
+        probability=True,
+        random_state=0,
+        decision_function_shape="ovo",
+    )
+    check_svm_model_equal(clf, X_train, y_train, X_test)
+
+
+@pytest.mark.parametrize("csr_container", CSR_CONTAINERS)
+def test_unsorted_indices(csr_container):
+    # test that the result with sorted and unsorted indices in csr is the same
+    # we use a subset of digits as iris, blobs or make_classification didn't
+    # show the problem
+    X, y = load_digits(return_X_y=True)
+    X_test = csr_container(X[50:100])
+    X, y = X[:50], y[:50]
+
+    X_sparse = csr_container(X)
+    coef_dense = (
+        svm.SVC(kernel="linear", probability=True, random_state=0).fit(X, y).coef_
+    )
+    sparse_svc = svm.SVC(kernel="linear", probability=True, random_state=0).fit(
+        X_sparse, y
+    )
+    coef_sorted = sparse_svc.coef_
+    # make sure dense and sparse SVM give the same result
+    assert_allclose(coef_dense, coef_sorted.toarray())
+
+    # reverse each row's indices
+    def scramble_indices(X):
+        new_data = []
+        new_indices = []
+        for i in range(1, len(X.indptr)):
+            row_slice = slice(*X.indptr[i - 1 : i + 1])
+            new_data.extend(X.data[row_slice][::-1])
+            new_indices.extend(X.indices[row_slice][::-1])
+        return csr_container((new_data, new_indices, X.indptr), shape=X.shape)
+
+    X_sparse_unsorted = scramble_indices(X_sparse)
+    X_test_unsorted = scramble_indices(X_test)
+
+    assert not X_sparse_unsorted.has_sorted_indices
+    assert not X_test_unsorted.has_sorted_indices
+
+    unsorted_svc = svm.SVC(kernel="linear", probability=True, random_state=0).fit(
+        X_sparse_unsorted, y
+    )
+    coef_unsorted = unsorted_svc.coef_
+    # make sure unsorted indices give same result
+    assert_allclose(coef_unsorted.toarray(), coef_sorted.toarray())
+    assert_allclose(
+        sparse_svc.predict_proba(X_test_unsorted), sparse_svc.predict_proba(X_test)
+    )
+
+
+@pytest.mark.parametrize("lil_container", LIL_CONTAINERS)
+def test_svc_with_custom_kernel(lil_container):
+    def kfunc(x, y):
+        return safe_sparse_dot(x, y.T)
+
+    X_sp = lil_container(X)
+    clf_lin = svm.SVC(kernel="linear").fit(X_sp, Y)
+    clf_mylin = svm.SVC(kernel=kfunc).fit(X_sp, Y)
+    assert_array_equal(clf_lin.predict(X_sp), clf_mylin.predict(X_sp))
+
+
+@skip_if_32bit
+@pytest.mark.parametrize("csr_container", CSR_CONTAINERS)
+@pytest.mark.parametrize("kernel", ["linear", "poly", "rbf"])
+def test_svc_iris(csr_container, kernel):
+    # Test the sparse SVC with the iris dataset
+    iris_data_sp = csr_container(iris.data)
+
+    sp_clf = svm.SVC(kernel=kernel).fit(iris_data_sp, iris.target)
+    clf = svm.SVC(kernel=kernel).fit(iris.data, iris.target)
+
+    assert_allclose(clf.support_vectors_, sp_clf.support_vectors_.toarray())
+    assert_allclose(clf.dual_coef_, sp_clf.dual_coef_.toarray())
+    assert_allclose(clf.predict(iris.data), sp_clf.predict(iris_data_sp))
+    if kernel == "linear":
+        assert_allclose(clf.coef_, sp_clf.coef_.toarray())
+
+
+@pytest.mark.parametrize("csr_container", CSR_CONTAINERS)
+def test_sparse_decision_function(csr_container):
+    # Test decision_function
+
+    # Sanity check, test that decision_function implemented in python
+    # returns the same as the one in libsvm
+
+    # multi class:
+    iris_data_sp = csr_container(iris.data)
+    svc = svm.SVC(kernel="linear", C=0.1, decision_function_shape="ovo")
+    clf = svc.fit(iris_data_sp, iris.target)
+
+    dec = safe_sparse_dot(iris_data_sp, clf.coef_.T) + clf.intercept_
+
+    assert_allclose(dec, clf.decision_function(iris_data_sp))
+
+    # binary:
+    clf.fit(X, Y)
+    dec = np.dot(X, clf.coef_.T) + clf.intercept_
+    prediction = clf.predict(X)
+    assert_allclose(dec.ravel(), clf.decision_function(X))
+    assert_allclose(
+        prediction, clf.classes_[(clf.decision_function(X) > 0).astype(int).ravel()]
+    )
+    expected = np.array([-1.0, -0.66, -1.0, 0.66, 1.0, 1.0])
+    assert_array_almost_equal(clf.decision_function(X), expected, decimal=2)
+
+
+@pytest.mark.parametrize("lil_container", LIL_CONTAINERS)
+def test_error(lil_container):
+    # Test that it gives proper exception on deficient input
+    clf = svm.SVC()
+    X_sp = lil_container(X)
+
+    Y2 = Y[:-1]  # wrong dimensions for labels
+    with pytest.raises(ValueError):
+        clf.fit(X_sp, Y2)
+
+    clf.fit(X_sp, Y)
+    assert_array_equal(clf.predict(T), true_result)
+
+
+@pytest.mark.parametrize(
+    "lil_container, dok_container", zip(LIL_CONTAINERS, DOK_CONTAINERS)
+)
+def test_linearsvc(lil_container, dok_container):
+    # Similar to test_SVC
+    X_sp = lil_container(X)
+    X2_sp = dok_container(X2)
+
+    clf = svm.LinearSVC(random_state=0).fit(X, Y)
+    sp_clf = svm.LinearSVC(random_state=0).fit(X_sp, Y)
+
+    assert sp_clf.fit_intercept
+
+    assert_array_almost_equal(clf.coef_, sp_clf.coef_, decimal=4)
+    assert_array_almost_equal(clf.intercept_, sp_clf.intercept_, decimal=4)
+
+    assert_allclose(clf.predict(X), sp_clf.predict(X_sp))
+
+    clf.fit(X2, Y2)
+    sp_clf.fit(X2_sp, Y2)
+
+    assert_array_almost_equal(clf.coef_, sp_clf.coef_, decimal=4)
+    assert_array_almost_equal(clf.intercept_, sp_clf.intercept_, decimal=4)
+
+
+@pytest.mark.parametrize("csr_container", CSR_CONTAINERS)
+def test_linearsvc_iris(csr_container):
+    # Test the sparse LinearSVC with the iris dataset
+    iris_data_sp = csr_container(iris.data)
+
+    sp_clf = svm.LinearSVC(random_state=0).fit(iris_data_sp, iris.target)
+    clf = svm.LinearSVC(random_state=0).fit(iris.data, iris.target)
+
+    assert clf.fit_intercept == sp_clf.fit_intercept
+
+    assert_array_almost_equal(clf.coef_, sp_clf.coef_, decimal=1)
+    assert_array_almost_equal(clf.intercept_, sp_clf.intercept_, decimal=1)
+    assert_allclose(clf.predict(iris.data), sp_clf.predict(iris_data_sp))
+
+    # check decision_function
+    pred = np.argmax(sp_clf.decision_function(iris_data_sp), axis=1)
+    assert_allclose(pred, clf.predict(iris.data))
+
+    # sparsify the coefficients on both models and check that they still
+    # produce the same results
+    clf.sparsify()
+    assert_array_equal(pred, clf.predict(iris_data_sp))
+    sp_clf.sparsify()
+    assert_array_equal(pred, sp_clf.predict(iris_data_sp))
+
+
+@pytest.mark.parametrize("csr_container", CSR_CONTAINERS)
+def test_weight(csr_container):
+    # Test class weights
+    X_, y_ = make_classification(
+        n_samples=200, n_features=100, weights=[0.833, 0.167], random_state=0
+    )
+
+    X_ = csr_container(X_)
+    for clf in (
+        linear_model.LogisticRegression(),
+        svm.LinearSVC(random_state=0),
+        svm.SVC(),
+    ):
+        clf.set_params(class_weight={0: 5})
+        clf.fit(X_[:180], y_[:180])
+        y_pred = clf.predict(X_[180:])
+        assert np.sum(y_pred == y_[180:]) >= 11
+
+
+@pytest.mark.parametrize("lil_container", LIL_CONTAINERS)
+def test_sample_weights(lil_container):
+    # Test weights on individual samples
+    X_sp = lil_container(X)
+
+    clf = svm.SVC()
+    clf.fit(X_sp, Y)
+    assert_array_equal(clf.predict([X[2]]), [1.0])
+
+    sample_weight = [0.1] * 3 + [10] * 3
+    clf.fit(X_sp, Y, sample_weight=sample_weight)
+    assert_array_equal(clf.predict([X[2]]), [2.0])
+
+
+def test_sparse_liblinear_intercept_handling():
+    # Test that sparse liblinear honours intercept_scaling param
+    test_svm.test_dense_liblinear_intercept_handling(svm.LinearSVC)
+
+
+@pytest.mark.parametrize(
+    "X_train, y_train, X_test",
+    [
+        [X, None, T],
+        [X2, None, T2],
+        [X_blobs[:80], None, X_blobs[80:]],
+        [iris.data, None, iris.data],
+    ],
+)
+@pytest.mark.parametrize("kernel", ["linear", "poly", "rbf", "sigmoid"])
+@pytest.mark.parametrize("sparse_container", CSR_CONTAINERS + LIL_CONTAINERS)
+@skip_if_32bit
+def test_sparse_oneclasssvm(X_train, y_train, X_test, kernel, sparse_container):
+    # Check that sparse OneClassSVM gives the same result as dense OneClassSVM
+    X_train = sparse_container(X_train)
+
+    clf = svm.OneClassSVM(gamma=1, kernel=kernel)
+    check_svm_model_equal(clf, X_train, y_train, X_test)
+
+
+@pytest.mark.parametrize("csr_container", CSR_CONTAINERS)
+def test_sparse_realdata(csr_container):
+    # Test on a subset from the 20newsgroups dataset.
+    # This catches some bugs if input is not correctly converted into
+    # sparse format or weights are not correctly initialized.
+    data = np.array([0.03771744, 0.1003567, 0.01174647, 0.027069])
+
+    # SVC does not support large sparse, so we specify int32 indices
+    # In this case, `csr_matrix` automatically uses int32 regardless of the dtypes of
+    # `indices` and `indptr` but `csr_array` may or may not use the same dtype as
+    # `indices` and `indptr`, which would be int64 if not specified
+    indices = np.array([6, 5, 35, 31], dtype=np.int32)
+    indptr = np.array([0] * 8 + [1] * 32 + [2] * 38 + [4] * 3, dtype=np.int32)
+
+    X = csr_container((data, indices, indptr))
+    y = np.array(
+        [
+            1.0,
+            0.0,
+            2.0,
+            2.0,
+            1.0,
+            1.0,
+            1.0,
+            2.0,
+            2.0,
+            0.0,
+            1.0,
+            2.0,
+            2.0,
+            0.0,
+            2.0,
+            0.0,
+            3.0,
+            0.0,
+            3.0,
+            0.0,
+            1.0,
+            1.0,
+            3.0,
+            2.0,
+            3.0,
+            2.0,
+            0.0,
+            3.0,
+            1.0,
+            0.0,
+            2.0,
+            1.0,
+            2.0,
+            0.0,
+            1.0,
+            0.0,
+            2.0,
+            3.0,
+            1.0,
+            3.0,
+            0.0,
+            1.0,
+            0.0,
+            0.0,
+            2.0,
+            0.0,
+            1.0,
+            2.0,
+            2.0,
+            2.0,
+            3.0,
+            2.0,
+            0.0,
+            3.0,
+            2.0,
+            1.0,
+            2.0,
+            3.0,
+            2.0,
+            2.0,
+            0.0,
+            1.0,
+            0.0,
+            1.0,
+            2.0,
+            3.0,
+            0.0,
+            0.0,
+            2.0,
+            2.0,
+            1.0,
+            3.0,
+            1.0,
+            1.0,
+            0.0,
+            1.0,
+            2.0,
+            1.0,
+            1.0,
+            3.0,
+        ]
+    )
+
+    clf = svm.SVC(kernel="linear").fit(X.toarray(), y)
+    sp_clf = svm.SVC(kernel="linear").fit(X.tocoo(), y)
+
+    assert_array_equal(clf.support_vectors_, sp_clf.support_vectors_.toarray())
+    assert_array_equal(clf.dual_coef_, sp_clf.dual_coef_.toarray())
+
+
+@pytest.mark.parametrize("lil_container", LIL_CONTAINERS)
+def test_sparse_svc_clone_with_callable_kernel(lil_container):
+    # Test that the "dense_fit" is called even though we use sparse input
+    # meaning that everything works fine.
+    a = svm.SVC(C=1, kernel=lambda x, y: x @ y.T, probability=True, random_state=0)
+    b = base.clone(a)
+
+    X_sp = lil_container(X)
+    b.fit(X_sp, Y)
+    pred = b.predict(X_sp)
+    b.predict_proba(X_sp)
+
+    dense_svm = svm.SVC(
+        C=1, kernel=lambda x, y: np.dot(x, y.T), probability=True, random_state=0
+    )
+    pred_dense = dense_svm.fit(X, Y).predict(X)
+    assert_array_equal(pred_dense, pred)
+    # b.decision_function(X_sp)  # XXX : should be supported
+
+
+@pytest.mark.parametrize("lil_container", LIL_CONTAINERS)
+def test_timeout(lil_container):
+    sp = svm.SVC(
+        C=1, kernel=lambda x, y: x @ y.T, probability=True, random_state=0, max_iter=1
+    )
+    warning_msg = (
+        r"Solver terminated early \(max_iter=1\).  Consider pre-processing "
+        r"your data with StandardScaler or MinMaxScaler."
+    )
+    with pytest.warns(ConvergenceWarning, match=warning_msg):
+        sp.fit(lil_container(X), Y)
+
+
+def test_consistent_proba():
+    a = svm.SVC(probability=True, max_iter=1, random_state=0)
+    with ignore_warnings(category=ConvergenceWarning):
+        proba_1 = a.fit(X, Y).predict_proba(X)
+    a = svm.SVC(probability=True, max_iter=1, random_state=0)
+    with ignore_warnings(category=ConvergenceWarning):
+        proba_2 = a.fit(X, Y).predict_proba(X)
+    assert_allclose(proba_1, proba_2)