feat: initial commit - Phase 1 & 2 core features

This commit is contained in:
hiderfong
2026-04-22 17:07:33 +08:00
commit 1773bda06b
25005 changed files with 6252106 additions and 0 deletions
@@ -0,0 +1,25 @@
"""
The :mod:`sklearn.svm` module includes Support Vector Machine algorithms.
"""
# See http://scikit-learn.sourceforge.net/modules/svm.html for complete
# documentation.
# Author: Fabian Pedregosa <fabian.pedregosa@inria.fr> with help from
# the scikit-learn community. LibSVM and LibLinear are copyright
# of their respective owners.
# License: BSD 3 clause (C) INRIA 2010
from ._bounds import l1_min_c
from ._classes import SVC, SVR, LinearSVC, LinearSVR, NuSVC, NuSVR, OneClassSVM
__all__ = [
"LinearSVC",
"LinearSVR",
"NuSVC",
"NuSVR",
"OneClassSVM",
"SVC",
"SVR",
"l1_min_c",
]
File diff suppressed because it is too large Load Diff
@@ -0,0 +1,95 @@
"""Determination of parameter bounds"""
# Author: Paolo Losi
# License: BSD 3 clause
from numbers import Real
import numpy as np
from ..preprocessing import LabelBinarizer
from ..utils._param_validation import Interval, StrOptions, validate_params
from ..utils.extmath import safe_sparse_dot
from ..utils.validation import check_array, check_consistent_length
@validate_params(
{
"X": ["array-like", "sparse matrix"],
"y": ["array-like"],
"loss": [StrOptions({"squared_hinge", "log"})],
"fit_intercept": ["boolean"],
"intercept_scaling": [Interval(Real, 0, None, closed="neither")],
},
prefer_skip_nested_validation=True,
)
def l1_min_c(X, y, *, loss="squared_hinge", fit_intercept=True, intercept_scaling=1.0):
"""Return the lowest bound for C.
The lower bound for C is computed such that for C in (l1_min_C, infinity)
the model is guaranteed not to be empty. This applies to l1 penalized
classifiers, such as LinearSVC with penalty='l1' and
linear_model.LogisticRegression with penalty='l1'.
This value is valid if class_weight parameter in fit() is not set.
Parameters
----------
X : {array-like, sparse matrix} of shape (n_samples, n_features)
Training vector, where `n_samples` is the number of samples and
`n_features` is the number of features.
y : array-like of shape (n_samples,)
Target vector relative to X.
loss : {'squared_hinge', 'log'}, default='squared_hinge'
Specifies the loss function.
With 'squared_hinge' it is the squared hinge loss (a.k.a. L2 loss).
With 'log' it is the loss of logistic regression models.
fit_intercept : bool, default=True
Specifies if the intercept should be fitted by the model.
It must match the fit() method parameter.
intercept_scaling : float, default=1.0
When fit_intercept is True, instance vector x becomes
[x, intercept_scaling],
i.e. a "synthetic" feature with constant value equals to
intercept_scaling is appended to the instance vector.
It must match the fit() method parameter.
Returns
-------
l1_min_c : float
Minimum value for C.
Examples
--------
>>> from sklearn.svm import l1_min_c
>>> from sklearn.datasets import make_classification
>>> X, y = make_classification(n_samples=100, n_features=20, random_state=42)
>>> print(f"{l1_min_c(X, y, loss='squared_hinge', fit_intercept=True):.4f}")
0.0044
"""
X = check_array(X, accept_sparse="csc")
check_consistent_length(X, y)
Y = LabelBinarizer(neg_label=-1).fit_transform(y).T
# maximum absolute value over classes and features
den = np.max(np.abs(safe_sparse_dot(Y, X)))
if fit_intercept:
bias = np.full(
(np.size(y), 1), intercept_scaling, dtype=np.array(intercept_scaling).dtype
)
den = max(den, abs(np.dot(Y, bias)).max())
if den == 0.0:
raise ValueError(
"Ill-posed l1_min_c calculation: l1 will always "
"select zero coefficients for this data"
)
if loss == "squared_hinge":
return 0.5 / den
else: # loss == 'log':
return 2.0 / den
File diff suppressed because it is too large Load Diff
@@ -0,0 +1,43 @@
from ..utils._typedefs cimport intp_t
cdef extern from "_cython_blas_helpers.h":
ctypedef double (*dot_func)(int, const double*, int, const double*, int)
ctypedef void (*axpy_func)(int, double, const double*, int, double*, int)
ctypedef void (*scal_func)(int, double, const double*, int)
ctypedef double (*nrm2_func)(int, const double*, int)
cdef struct BlasFunctions:
dot_func dot
axpy_func axpy
scal_func scal
nrm2_func nrm2
cdef extern from "linear.h":
cdef struct feature_node
cdef struct problem
cdef struct model
cdef struct parameter
ctypedef problem* problem_const_ptr "problem const *"
ctypedef parameter* parameter_const_ptr "parameter const *"
ctypedef char* char_const_ptr "char const *"
char_const_ptr check_parameter(problem_const_ptr prob, parameter_const_ptr param)
model *train(problem_const_ptr prob, parameter_const_ptr param, BlasFunctions *blas_functions) nogil
int get_nr_feature (model *model)
int get_nr_class (model *model)
void get_n_iter (model *model, int *n_iter)
void free_and_destroy_model (model **)
void destroy_param (parameter *)
cdef extern from "liblinear_helper.c":
void copy_w(void *, model *, int)
parameter *set_parameter(int, double, double, int, char *, char *, int, int, double)
problem *set_problem (char *, int, int, int, int, double, char *, char *)
problem *csr_set_problem (char *, int, char *, char *, int, int, int, double, char *, char *)
model *set_model(parameter *, char *, intp_t *, char *, double)
double get_bias(model *)
void free_problem (problem *)
void free_parameter (parameter *)
void set_verbosity(int)
@@ -0,0 +1,147 @@
"""
Wrapper for liblinear
Author: fabian.pedregosa@inria.fr
"""
import numpy as np
from ..utils._cython_blas cimport _dot, _axpy, _scal, _nrm2
from ..utils._typedefs cimport float32_t, float64_t, int32_t
include "_liblinear.pxi"
def train_wrap(
object X,
const float64_t[::1] Y,
bint is_sparse,
int solver_type,
double eps,
double bias,
double C,
const float64_t[:] class_weight,
int max_iter,
unsigned random_seed,
double epsilon,
const float64_t[::1] sample_weight
):
cdef parameter *param
cdef problem *problem
cdef model *model
cdef char_const_ptr error_msg
cdef int len_w
cdef bint X_has_type_float64 = X.dtype == np.float64
cdef char * X_data_bytes_ptr
cdef const float64_t[::1] X_data_64
cdef const float32_t[::1] X_data_32
cdef const int32_t[::1] X_indices
cdef const int32_t[::1] X_indptr
if is_sparse:
X_indices = X.indices
X_indptr = X.indptr
if X_has_type_float64:
X_data_64 = X.data
X_data_bytes_ptr = <char *> &X_data_64[0]
else:
X_data_32 = X.data
X_data_bytes_ptr = <char *> &X_data_32[0]
problem = csr_set_problem(
X_data_bytes_ptr,
X_has_type_float64,
<char *> &X_indices[0],
<char *> &X_indptr[0],
(<int32_t>X.shape[0]),
(<int32_t>X.shape[1]),
(<int32_t>X.nnz),
bias,
<char *> &sample_weight[0],
<char *> &Y[0]
)
else:
X_as_1d_array = X.reshape(-1)
if X_has_type_float64:
X_data_64 = X_as_1d_array
X_data_bytes_ptr = <char *> &X_data_64[0]
else:
X_data_32 = X_as_1d_array
X_data_bytes_ptr = <char *> &X_data_32[0]
problem = set_problem(
X_data_bytes_ptr,
X_has_type_float64,
(<int32_t>X.shape[0]),
(<int32_t>X.shape[1]),
(<int32_t>np.count_nonzero(X)),
bias,
<char *> &sample_weight[0],
<char *> &Y[0]
)
cdef int32_t[::1] class_weight_label = np.arange(class_weight.shape[0], dtype=np.intc)
param = set_parameter(
solver_type,
eps,
C,
class_weight.shape[0],
<char *> &class_weight_label[0] if class_weight_label.size > 0 else NULL,
<char *> &class_weight[0] if class_weight.size > 0 else NULL,
max_iter,
random_seed,
epsilon
)
error_msg = check_parameter(problem, param)
if error_msg:
free_problem(problem)
free_parameter(param)
raise ValueError(error_msg)
cdef BlasFunctions blas_functions
blas_functions.dot = _dot[double]
blas_functions.axpy = _axpy[double]
blas_functions.scal = _scal[double]
blas_functions.nrm2 = _nrm2[double]
# early return
with nogil:
model = train(problem, param, &blas_functions)
# FREE
free_problem(problem)
free_parameter(param)
# destroy_param(param) don't call this or it will destroy class_weight_label and class_weight
# coef matrix holder created as fortran since that's what's used in liblinear
cdef float64_t[::1, :] w
cdef int nr_class = get_nr_class(model)
cdef int labels_ = nr_class
if nr_class == 2:
labels_ = 1
cdef int32_t[::1] n_iter = np.zeros(labels_, dtype=np.intc)
get_n_iter(model, <int *> &n_iter[0])
cdef int nr_feature = get_nr_feature(model)
if bias > 0:
nr_feature = nr_feature + 1
if nr_class == 2 and solver_type != 4: # solver is not Crammer-Singer
w = np.empty((1, nr_feature), order='F')
copy_w(&w[0, 0], model, nr_feature)
else:
len_w = (nr_class) * nr_feature
w = np.empty((nr_class, nr_feature), order='F')
copy_w(&w[0, 0], model, len_w)
free_and_destroy_model(&model)
return w.base, n_iter.base
def set_verbosity_wrap(int verbosity):
"""
Control verbosity of libsvm library
"""
set_verbosity(verbosity)
@@ -0,0 +1,75 @@
################################################################################
# Includes
from ..utils._typedefs cimport intp_t
cdef extern from "_svm_cython_blas_helpers.h":
ctypedef double (*dot_func)(int, const double*, int, const double*, int)
cdef struct BlasFunctions:
dot_func dot
cdef extern from "svm.h":
cdef struct svm_node
cdef struct svm_model
cdef struct svm_parameter:
int svm_type
int kernel_type
int degree # for poly
double gamma # for poly/rbf/sigmoid
double coef0 # for poly/sigmoid
# these are for training only
double cache_size # in MB
double eps # stopping criteria
double C # for C_SVC, EPSILON_SVR and NU_SVR
int nr_weight # for C_SVC
int *weight_label # for C_SVC
double* weight # for C_SVC
double nu # for NU_SVC, ONE_CLASS, and NU_SVR
double p # for EPSILON_SVR
int shrinking # use the shrinking heuristics
int probability # do probability estimates
int max_iter # ceiling on Solver runtime
int random_seed # seed for random generator in probability estimation
cdef struct svm_problem:
int l
double *y
svm_node *x
double *W # instance weights
char *svm_check_parameter(svm_problem *, svm_parameter *)
svm_model *svm_train(svm_problem *, svm_parameter *, int *, BlasFunctions *) nogil
void svm_free_and_destroy_model(svm_model** model_ptr_ptr)
void svm_cross_validation(svm_problem *, svm_parameter *, int nr_fold, double *target, BlasFunctions *) nogil
cdef extern from "libsvm_helper.c":
# this file contains methods for accessing libsvm 'hidden' fields
svm_node **dense_to_sparse (char *, intp_t *)
void set_parameter (svm_parameter *, int , int , int , double, double ,
double , double , double , double,
double, int, int, int, char *, char *, int,
int)
void set_problem (svm_problem *, char *, char *, char *, intp_t *, int)
svm_model *set_model (svm_parameter *, int, char *, intp_t *,
char *, intp_t *, intp_t *, char *,
char *, char *, char *, char *)
void copy_sv_coef (char *, svm_model *)
void copy_n_iter (char *, svm_model *)
void copy_intercept (char *, svm_model *, intp_t *)
void copy_SV (char *, svm_model *, intp_t *)
int copy_support (char *data, svm_model *model)
int copy_predict (char *, svm_model *, intp_t *, char *, BlasFunctions *) nogil
int copy_predict_proba (char *, svm_model *, intp_t *, char *, BlasFunctions *) nogil
int copy_predict_values(char *, svm_model *, intp_t *, char *, int, BlasFunctions *) nogil
void copy_nSV (char *, svm_model *)
void copy_probA (char *, svm_model *, intp_t *)
void copy_probB (char *, svm_model *, intp_t *)
intp_t get_l (svm_model *)
intp_t get_nr (svm_model *)
int free_problem (svm_problem *)
int free_model (svm_model *)
void set_verbosity(int)
@@ -0,0 +1,917 @@
"""
Binding for libsvm_skl
----------------------
These are the bindings for libsvm_skl, which is a fork of libsvm[1]
that adds to libsvm some capabilities, like index of support vectors
and efficient representation of dense matrices.
These are low-level routines, but can be used for flexibility or
performance reasons. See sklearn.svm for a higher-level API.
Low-level memory management is done in libsvm_helper.c. If we happen
to run out of memory a MemoryError will be raised. In practice this is
not very helpful since high chances are malloc fails inside svm.cpp,
where no sort of memory checks are done.
[1] https://www.csie.ntu.edu.tw/~cjlin/libsvm/
Notes
-----
The signature mode='c' is somewhat superficial, since we already
check that arrays are C-contiguous in svm.py
Authors
-------
2010: Fabian Pedregosa <fabian.pedregosa@inria.fr>
Gael Varoquaux <gael.varoquaux@normalesup.org>
"""
import numpy as np
from libc.stdlib cimport free
from ..utils._cython_blas cimport _dot
from ..utils._typedefs cimport float64_t, int32_t, intp_t
include "_libsvm.pxi"
cdef extern from *:
ctypedef struct svm_parameter:
pass
################################################################################
# Internal variables
LIBSVM_KERNEL_TYPES = ['linear', 'poly', 'rbf', 'sigmoid', 'precomputed']
################################################################################
# Wrapper functions
def fit(
const float64_t[:, ::1] X,
const float64_t[::1] Y,
int svm_type=0,
kernel='rbf',
int degree=3,
double gamma=0.1,
double coef0=0.0,
double tol=1e-3,
double C=1.0,
double nu=0.5,
double epsilon=0.1,
const float64_t[::1] class_weight=np.empty(0),
const float64_t[::1] sample_weight=np.empty(0),
int shrinking=1,
int probability=0,
double cache_size=100.,
int max_iter=-1,
int random_seed=0,
):
"""
Train the model using libsvm (low-level method)
Parameters
----------
X : array-like, dtype=float64 of shape (n_samples, n_features)
Y : array, dtype=float64 of shape (n_samples,)
target vector
svm_type : {0, 1, 2, 3, 4}, default=0
Type of SVM: C_SVC, NuSVC, OneClassSVM, EpsilonSVR or NuSVR
respectively.
kernel : {'linear', 'rbf', 'poly', 'sigmoid', 'precomputed'}, default="rbf"
Kernel to use in the model: linear, polynomial, RBF, sigmoid
or precomputed.
degree : int32, default=3
Degree of the polynomial kernel (only relevant if kernel is
set to polynomial).
gamma : float64, default=0.1
Gamma parameter in rbf, poly and sigmoid kernels. Ignored by other
kernels.
coef0 : float64, default=0
Independent parameter in poly/sigmoid kernel.
tol : float64, default=1e-3
Numeric stopping criterion (WRITEME).
C : float64, default=1
C parameter in C-Support Vector Classification.
nu : float64, default=0.5
An upper bound on the fraction of training errors and a lower bound of
the fraction of support vectors. Should be in the interval (0, 1].
epsilon : double, default=0.1
Epsilon parameter in the epsilon-insensitive loss function.
class_weight : array, dtype=float64, shape (n_classes,), \
default=np.empty(0)
Set the parameter C of class i to class_weight[i]*C for
SVC. If not given, all classes are supposed to have
weight one.
sample_weight : array, dtype=float64, shape (n_samples,), \
default=np.empty(0)
Weights assigned to each sample.
shrinking : int, default=1
Whether to use the shrinking heuristic.
probability : int, default=0
Whether to enable probability estimates.
cache_size : float64, default=100
Cache size for gram matrix columns (in megabytes).
max_iter : int (-1 for no limit), default=-1
Stop solver after this many iterations regardless of accuracy
(XXX Currently there is no API to know whether this kicked in.)
random_seed : int, default=0
Seed for the random number generator used for probability estimates.
Returns
-------
support : array of shape (n_support,)
Index of support vectors.
support_vectors : array of shape (n_support, n_features)
Support vectors (equivalent to X[support]). Will return an
empty array in the case of precomputed kernel.
n_class_SV : array of shape (n_class,)
Number of support vectors in each class.
sv_coef : array of shape (n_class-1, n_support)
Coefficients of support vectors in decision function.
intercept : array of shape (n_class*(n_class-1)/2,)
Intercept in decision function.
probA, probB : array of shape (n_class*(n_class-1)/2,)
Probability estimates, empty array for probability=False.
n_iter : ndarray of shape (max(1, (n_class * (n_class - 1) // 2)),)
Number of iterations run by the optimization routine to fit the model.
"""
cdef svm_parameter param
cdef svm_problem problem
cdef svm_model *model
cdef const char *error_msg
cdef intp_t SV_len
if len(sample_weight) == 0:
sample_weight = np.ones(X.shape[0], dtype=np.float64)
else:
assert sample_weight.shape[0] == X.shape[0], (
f"sample_weight and X have incompatible shapes: sample_weight has "
f"{sample_weight.shape[0]} samples while X has {X.shape[0]}"
)
kernel_index = LIBSVM_KERNEL_TYPES.index(kernel)
set_problem(
&problem,
<char*> &X[0, 0],
<char*> &Y[0],
<char*> &sample_weight[0],
<intp_t*> X.shape,
kernel_index,
)
if problem.x == NULL:
raise MemoryError("Seems we've run out of memory")
cdef int32_t[::1] class_weight_label = np.arange(
class_weight.shape[0], dtype=np.int32
)
set_parameter(
&param,
svm_type,
kernel_index,
degree,
gamma,
coef0,
nu,
cache_size,
C,
tol,
epsilon,
shrinking,
probability,
<int> class_weight.shape[0],
<char*> &class_weight_label[0] if class_weight_label.size > 0 else NULL,
<char*> &class_weight[0] if class_weight.size > 0 else NULL,
max_iter,
random_seed,
)
error_msg = svm_check_parameter(&problem, &param)
if error_msg:
# for SVR: epsilon is called p in libsvm
error_repl = error_msg.decode('utf-8').replace("p < 0", "epsilon < 0")
raise ValueError(error_repl)
cdef BlasFunctions blas_functions
blas_functions.dot = _dot[double]
# this does the real work
cdef int fit_status = 0
with nogil:
model = svm_train(&problem, &param, &fit_status, &blas_functions)
# from here until the end, we just copy the data returned by
# svm_train
SV_len = get_l(model)
n_class = get_nr(model)
cdef int[::1] n_iter = np.empty(max(1, n_class * (n_class - 1) // 2), dtype=np.intc)
copy_n_iter(<char*> &n_iter[0], model)
cdef float64_t[:, ::1] sv_coef = np.empty((n_class-1, SV_len), dtype=np.float64)
copy_sv_coef(<char*> &sv_coef[0, 0] if sv_coef.size > 0 else NULL, model)
# the intercept is just model.rho but with sign changed
cdef float64_t[::1] intercept = np.empty(
int((n_class*(n_class-1))/2), dtype=np.float64
)
copy_intercept(<char*> &intercept[0], model, <intp_t*> intercept.shape)
cdef int32_t[::1] support = np.empty(SV_len, dtype=np.int32)
copy_support(<char*> &support[0] if support.size > 0 else NULL, model)
# copy model.SV
cdef float64_t[:, ::1] support_vectors
if kernel_index == 4:
# precomputed kernel
support_vectors = np.empty((0, 0), dtype=np.float64)
else:
support_vectors = np.empty((SV_len, X.shape[1]), dtype=np.float64)
copy_SV(
<char*> &support_vectors[0, 0] if support_vectors.size > 0 else NULL,
model,
<intp_t*> support_vectors.shape,
)
cdef int32_t[::1] n_class_SV
if svm_type == 0 or svm_type == 1:
n_class_SV = np.empty(n_class, dtype=np.int32)
copy_nSV(<char*> &n_class_SV[0] if n_class_SV.size > 0 else NULL, model)
else:
# OneClass and SVR are considered to have 2 classes
n_class_SV = np.array([SV_len, SV_len], dtype=np.int32)
cdef float64_t[::1] probA
cdef float64_t[::1] probB
if probability != 0:
if svm_type < 2: # SVC and NuSVC
probA = np.empty(int(n_class*(n_class-1)/2), dtype=np.float64)
probB = np.empty(int(n_class*(n_class-1)/2), dtype=np.float64)
copy_probB(<char*> &probB[0], model, <intp_t*> probB.shape)
else:
probA = np.empty(1, dtype=np.float64)
probB = np.empty(0, dtype=np.float64)
copy_probA(<char*> &probA[0], model, <intp_t*> probA.shape)
else:
probA = np.empty(0, dtype=np.float64)
probB = np.empty(0, dtype=np.float64)
svm_free_and_destroy_model(&model)
free(problem.x)
return (
support.base,
support_vectors.base,
n_class_SV.base,
sv_coef.base,
intercept.base,
probA.base,
probB.base,
fit_status,
n_iter.base,
)
cdef void set_predict_params(
svm_parameter *param,
int svm_type,
kernel,
int degree,
double gamma,
double coef0,
double cache_size,
int probability,
int nr_weight,
char *weight_label,
char *weight,
) except *:
"""Fill param with prediction time-only parameters."""
# training-time only parameters
cdef double C = 0.0
cdef double epsilon = 0.1
cdef int max_iter = 0
cdef double nu = 0.5
cdef int shrinking = 0
cdef double tol = 0.1
cdef int random_seed = -1
kernel_index = LIBSVM_KERNEL_TYPES.index(kernel)
set_parameter(
param,
svm_type,
kernel_index,
degree,
gamma,
coef0,
nu,
cache_size,
C,
tol,
epsilon,
shrinking,
probability,
nr_weight,
weight_label,
weight,
max_iter,
random_seed,
)
def predict(
const float64_t[:, ::1] X,
const int32_t[::1] support,
const float64_t[:, ::1] SV,
const int32_t[::1] nSV,
const float64_t[:, ::1] sv_coef,
const float64_t[::1] intercept,
const float64_t[::1] probA=np.empty(0),
const float64_t[::1] probB=np.empty(0),
int svm_type=0,
kernel='rbf',
int degree=3,
double gamma=0.1,
double coef0=0.0,
const float64_t[::1] class_weight=np.empty(0),
const float64_t[::1] sample_weight=np.empty(0),
double cache_size=100.0,
):
"""
Predict target values of X given a model (low-level method)
Parameters
----------
X : array-like, dtype=float of shape (n_samples, n_features)
support : array of shape (n_support,)
Index of support vectors in training set.
SV : array of shape (n_support, n_features)
Support vectors.
nSV : array of shape (n_class,)
Number of support vectors in each class.
sv_coef : array of shape (n_class-1, n_support)
Coefficients of support vectors in decision function.
intercept : array of shape (n_class*(n_class-1)/2)
Intercept in decision function.
probA, probB : array of shape (n_class*(n_class-1)/2,)
Probability estimates.
svm_type : {0, 1, 2, 3, 4}, default=0
Type of SVM: C_SVC, NuSVC, OneClassSVM, EpsilonSVR or NuSVR
respectively.
kernel : {'linear', 'rbf', 'poly', 'sigmoid', 'precomputed'}, default="rbf"
Kernel to use in the model: linear, polynomial, RBF, sigmoid
or precomputed.
degree : int32, default=3
Degree of the polynomial kernel (only relevant if kernel is
set to polynomial).
gamma : float64, default=0.1
Gamma parameter in rbf, poly and sigmoid kernels. Ignored by other
kernels.
coef0 : float64, default=0.0
Independent parameter in poly/sigmoid kernel.
Returns
-------
dec_values : array
Predicted values.
"""
cdef float64_t[::1] dec_values
cdef svm_parameter param
cdef svm_model *model
cdef int rv
cdef int32_t[::1] class_weight_label = np.arange(
class_weight.shape[0], dtype=np.int32
)
set_predict_params(
&param,
svm_type,
kernel,
degree,
gamma,
coef0,
cache_size,
0,
<int>class_weight.shape[0],
<char*> &class_weight_label[0] if class_weight_label.size > 0 else NULL,
<char*> &class_weight[0] if class_weight.size > 0 else NULL,
)
model = set_model(
&param,
<int> nSV.shape[0],
<char*> &SV[0, 0] if SV.size > 0 else NULL,
<intp_t*> SV.shape,
<char*> &support[0] if support.size > 0 else NULL,
<intp_t*> support.shape,
<intp_t*> sv_coef.strides,
<char*> &sv_coef[0, 0] if sv_coef.size > 0 else NULL,
<char*> &intercept[0],
<char*> &nSV[0],
<char*> &probA[0] if probA.size > 0 else NULL,
<char*> &probB[0] if probB.size > 0 else NULL,
)
cdef BlasFunctions blas_functions
blas_functions.dot = _dot[double]
# TODO: use check_model
try:
dec_values = np.empty(X.shape[0])
with nogil:
rv = copy_predict(
<char*> &X[0, 0],
model,
<intp_t*> X.shape,
<char*> &dec_values[0],
&blas_functions,
)
if rv < 0:
raise MemoryError("We've run out of memory")
finally:
free_model(model)
return dec_values.base
def predict_proba(
const float64_t[:, ::1] X,
const int32_t[::1] support,
const float64_t[:, ::1] SV,
const int32_t[::1] nSV,
float64_t[:, ::1] sv_coef,
float64_t[::1] intercept,
float64_t[::1] probA=np.empty(0),
float64_t[::1] probB=np.empty(0),
int svm_type=0,
kernel='rbf',
int degree=3,
double gamma=0.1,
double coef0=0.0,
float64_t[::1] class_weight=np.empty(0),
float64_t[::1] sample_weight=np.empty(0),
double cache_size=100.0,
):
"""
Predict probabilities
svm_model stores all parameters needed to predict a given value.
For speed, all real work is done at the C level in function
copy_predict (libsvm_helper.c).
We have to reconstruct model and parameters to make sure we stay
in sync with the python object.
See sklearn.svm.predict for a complete list of parameters.
Parameters
----------
X : array-like, dtype=float of shape (n_samples, n_features)
support : array of shape (n_support,)
Index of support vectors in training set.
SV : array of shape (n_support, n_features)
Support vectors.
nSV : array of shape (n_class,)
Number of support vectors in each class.
sv_coef : array of shape (n_class-1, n_support)
Coefficients of support vectors in decision function.
intercept : array of shape (n_class*(n_class-1)/2,)
Intercept in decision function.
probA, probB : array of shape (n_class*(n_class-1)/2,)
Probability estimates.
svm_type : {0, 1, 2, 3, 4}, default=0
Type of SVM: C_SVC, NuSVC, OneClassSVM, EpsilonSVR or NuSVR
respectively.
kernel : {'linear', 'rbf', 'poly', 'sigmoid', 'precomputed'}, default="rbf"
Kernel to use in the model: linear, polynomial, RBF, sigmoid
or precomputed.
degree : int32, default=3
Degree of the polynomial kernel (only relevant if kernel is
set to polynomial).
gamma : float64, default=0.1
Gamma parameter in rbf, poly and sigmoid kernels. Ignored by other
kernels.
coef0 : float64, default=0.0
Independent parameter in poly/sigmoid kernel.
Returns
-------
dec_values : array
Predicted values.
"""
cdef float64_t[:, ::1] dec_values
cdef svm_parameter param
cdef svm_model *model
cdef int32_t[::1] class_weight_label = np.arange(
class_weight.shape[0], dtype=np.int32
)
cdef int rv
set_predict_params(
&param,
svm_type,
kernel,
degree,
gamma,
coef0,
cache_size,
1,
<int> class_weight.shape[0],
<char*> &class_weight_label[0] if class_weight_label.size > 0 else NULL,
<char*> &class_weight[0] if class_weight.size > 0 else NULL,
)
model = set_model(
&param,
<int> nSV.shape[0],
<char*> &SV[0, 0] if SV.size > 0 else NULL,
<intp_t*> SV.shape,
<char*> &support[0],
<intp_t*> support.shape,
<intp_t*> sv_coef.strides,
<char*> &sv_coef[0, 0],
<char*> &intercept[0],
<char*> &nSV[0],
<char*> &probA[0] if probA.size > 0 else NULL,
<char*> &probB[0] if probB.size > 0 else NULL,
)
cdef intp_t n_class = get_nr(model)
cdef BlasFunctions blas_functions
blas_functions.dot = _dot[double]
try:
dec_values = np.empty((X.shape[0], n_class), dtype=np.float64)
with nogil:
rv = copy_predict_proba(
<char*> &X[0, 0],
model,
<intp_t*> X.shape,
<char*> &dec_values[0, 0],
&blas_functions,
)
if rv < 0:
raise MemoryError("We've run out of memory")
finally:
free_model(model)
return dec_values.base
def decision_function(
const float64_t[:, ::1] X,
const int32_t[::1] support,
const float64_t[:, ::1] SV,
const int32_t[::1] nSV,
const float64_t[:, ::1] sv_coef,
const float64_t[::1] intercept,
const float64_t[::1] probA=np.empty(0),
const float64_t[::1] probB=np.empty(0),
int svm_type=0,
kernel='rbf',
int degree=3,
double gamma=0.1,
double coef0=0.0,
const float64_t[::1] class_weight=np.empty(0),
const float64_t[::1] sample_weight=np.empty(0),
double cache_size=100.0,
):
"""
Predict margin (libsvm name for this is predict_values)
We have to reconstruct model and parameters to make sure we stay
in sync with the python object.
Parameters
----------
X : array-like, dtype=float, size=[n_samples, n_features]
support : array, shape=[n_support]
Index of support vectors in training set.
SV : array, shape=[n_support, n_features]
Support vectors.
nSV : array, shape=[n_class]
Number of support vectors in each class.
sv_coef : array, shape=[n_class-1, n_support]
Coefficients of support vectors in decision function.
intercept : array, shape=[n_class*(n_class-1)/2]
Intercept in decision function.
probA, probB : array, shape=[n_class*(n_class-1)/2]
Probability estimates.
svm_type : {0, 1, 2, 3, 4}, optional
Type of SVM: C_SVC, NuSVC, OneClassSVM, EpsilonSVR or NuSVR
respectively. 0 by default.
kernel : {'linear', 'rbf', 'poly', 'sigmoid', 'precomputed'}, optional
Kernel to use in the model: linear, polynomial, RBF, sigmoid
or precomputed. 'rbf' by default.
degree : int32, optional
Degree of the polynomial kernel (only relevant if kernel is
set to polynomial), 3 by default.
gamma : float64, optional
Gamma parameter in rbf, poly and sigmoid kernels. Ignored by other
kernels. 0.1 by default.
coef0 : float64, optional
Independent parameter in poly/sigmoid kernel. 0 by default.
Returns
-------
dec_values : array
Predicted values.
"""
cdef float64_t[:, ::1] dec_values
cdef svm_parameter param
cdef svm_model *model
cdef intp_t n_class
cdef int32_t[::1] class_weight_label = np.arange(
class_weight.shape[0], dtype=np.int32
)
cdef int rv
set_predict_params(
&param,
svm_type,
kernel,
degree,
gamma,
coef0,
cache_size,
0,
<int> class_weight.shape[0],
<char*> &class_weight_label[0] if class_weight_label.size > 0 else NULL,
<char*> &class_weight[0] if class_weight.size > 0 else NULL,
)
model = set_model(
&param,
<int> nSV.shape[0],
<char*> &SV[0, 0] if SV.size > 0 else NULL,
<intp_t*> SV.shape,
<char*> &support[0],
<intp_t*> support.shape,
<intp_t*> sv_coef.strides,
<char*> &sv_coef[0, 0],
<char*> &intercept[0],
<char*> &nSV[0],
<char*> &probA[0] if probA.size > 0 else NULL,
<char*> &probB[0] if probB.size > 0 else NULL,
)
if svm_type > 1:
n_class = 1
else:
n_class = get_nr(model)
n_class = n_class * (n_class - 1) // 2
cdef BlasFunctions blas_functions
blas_functions.dot = _dot[double]
try:
dec_values = np.empty((X.shape[0], n_class), dtype=np.float64)
with nogil:
rv = copy_predict_values(
<char*> &X[0, 0],
model,
<intp_t*> X.shape,
<char*> &dec_values[0, 0],
n_class,
&blas_functions,
)
if rv < 0:
raise MemoryError("We've run out of memory")
finally:
free_model(model)
return dec_values.base
def cross_validation(
const float64_t[:, ::1] X,
const float64_t[::1] Y,
int n_fold,
int svm_type=0,
kernel='rbf',
int degree=3,
double gamma=0.1,
double coef0=0.0,
double tol=1e-3,
double C=1.0,
double nu=0.5,
double epsilon=0.1,
float64_t[::1] class_weight=np.empty(0),
float64_t[::1] sample_weight=np.empty(0),
int shrinking=0,
int probability=0,
double cache_size=100.0,
int max_iter=-1,
int random_seed=0,
):
"""
Binding of the cross-validation routine (low-level routine)
Parameters
----------
X : array-like, dtype=float of shape (n_samples, n_features)
Y : array, dtype=float of shape (n_samples,)
target vector
n_fold : int32
Number of folds for cross validation.
svm_type : {0, 1, 2, 3, 4}, default=0
Type of SVM: C_SVC, NuSVC, OneClassSVM, EpsilonSVR or NuSVR
respectively.
kernel : {'linear', 'rbf', 'poly', 'sigmoid', 'precomputed'}, default='rbf'
Kernel to use in the model: linear, polynomial, RBF, sigmoid
or precomputed.
degree : int32, default=3
Degree of the polynomial kernel (only relevant if kernel is
set to polynomial).
gamma : float64, default=0.1
Gamma parameter in rbf, poly and sigmoid kernels. Ignored by other
kernels.
coef0 : float64, default=0.0
Independent parameter in poly/sigmoid kernel.
tol : float64, default=1e-3
Numeric stopping criterion (WRITEME).
C : float64, default=1
C parameter in C-Support Vector Classification.
nu : float64, default=0.5
An upper bound on the fraction of training errors and a lower bound of
the fraction of support vectors. Should be in the interval (0, 1].
epsilon : double, default=0.1
Epsilon parameter in the epsilon-insensitive loss function.
class_weight : array, dtype=float64, shape (n_classes,), \
default=np.empty(0)
Set the parameter C of class i to class_weight[i]*C for
SVC. If not given, all classes are supposed to have
weight one.
sample_weight : array, dtype=float64, shape (n_samples,), \
default=np.empty(0)
Weights assigned to each sample.
shrinking : int, default=1
Whether to use the shrinking heuristic.
probability : int, default=0
Whether to enable probability estimates.
cache_size : float64, default=100
Cache size for gram matrix columns (in megabytes).
max_iter : int (-1 for no limit), default=-1
Stop solver after this many iterations regardless of accuracy
(XXX Currently there is no API to know whether this kicked in.)
random_seed : int, default=0
Seed for the random number generator used for probability estimates.
Returns
-------
target : array, float
"""
cdef svm_parameter param
cdef svm_problem problem
cdef const char *error_msg
if len(sample_weight) == 0:
sample_weight = np.ones(X.shape[0], dtype=np.float64)
else:
assert sample_weight.shape[0] == X.shape[0], (
f"sample_weight and X have incompatible shapes: sample_weight has "
f"{sample_weight.shape[0]} samples while X has {X.shape[0]}"
)
if X.shape[0] < n_fold:
raise ValueError("Number of samples is less than number of folds")
# set problem
kernel_index = LIBSVM_KERNEL_TYPES.index(kernel)
set_problem(
&problem,
<char*> &X[0, 0],
<char*> &Y[0],
<char*> &sample_weight[0] if sample_weight.size > 0 else NULL,
<intp_t*> X.shape,
kernel_index,
)
if problem.x == NULL:
raise MemoryError("Seems we've run out of memory")
cdef int32_t[::1] class_weight_label = np.arange(
class_weight.shape[0], dtype=np.int32
)
# set parameters
set_parameter(
&param,
svm_type,
kernel_index,
degree,
gamma,
coef0,
nu,
cache_size,
C,
tol,
tol,
shrinking,
probability,
<int> class_weight.shape[0],
<char*> &class_weight_label[0] if class_weight_label.size > 0 else NULL,
<char*> &class_weight[0] if class_weight.size > 0 else NULL,
max_iter,
random_seed,
)
error_msg = svm_check_parameter(&problem, &param)
if error_msg:
raise ValueError(error_msg)
cdef float64_t[::1] target
cdef BlasFunctions blas_functions
blas_functions.dot = _dot[double]
try:
target = np.empty((X.shape[0]), dtype=np.float64)
with nogil:
svm_cross_validation(
&problem,
&param,
n_fold,
<double *> &target[0],
&blas_functions,
)
finally:
free(problem.x)
return target.base
def set_verbosity_wrap(int verbosity):
"""
Control verbosity of libsvm library
"""
set_verbosity(verbosity)
@@ -0,0 +1,550 @@
import numpy as np
from scipy import sparse
from ..utils._cython_blas cimport _dot
from ..utils._typedefs cimport float64_t, int32_t, intp_t
cdef extern from *:
ctypedef char* const_char_p "const char*"
################################################################################
# Includes
cdef extern from "_svm_cython_blas_helpers.h":
ctypedef double (*dot_func)(int, const double*, int, const double*, int)
cdef struct BlasFunctions:
dot_func dot
cdef extern from "svm.h":
cdef struct svm_csr_node
cdef struct svm_csr_model
cdef struct svm_parameter
cdef struct svm_csr_problem
char *svm_csr_check_parameter(svm_csr_problem *, svm_parameter *)
svm_csr_model *svm_csr_train(svm_csr_problem *, svm_parameter *, int *, BlasFunctions *) nogil
void svm_csr_free_and_destroy_model(svm_csr_model** model_ptr_ptr)
cdef extern from "libsvm_sparse_helper.c":
# this file contains methods for accessing libsvm 'hidden' fields
svm_csr_problem * csr_set_problem (
char *, intp_t *, char *, intp_t *, char *, char *, char *, int)
svm_csr_model *csr_set_model(svm_parameter *param, int nr_class,
char *SV_data, intp_t *SV_indices_dims,
char *SV_indices, intp_t *SV_intptr_dims,
char *SV_intptr,
char *sv_coef, char *rho, char *nSV,
char *probA, char *probB)
svm_parameter *set_parameter (int , int , int , double, double ,
double , double , double , double,
double, int, int, int, char *, char *, int,
int)
void copy_sv_coef (char *, svm_csr_model *)
void copy_n_iter (char *, svm_csr_model *)
void copy_support (char *, svm_csr_model *)
void copy_intercept (char *, svm_csr_model *, intp_t *)
int copy_predict (char *, svm_csr_model *, intp_t *, char *, BlasFunctions *)
int csr_copy_predict_values (intp_t *data_size, char *data, intp_t *index_size,
char *index, intp_t *intptr_size, char *size,
svm_csr_model *model, char *dec_values, int nr_class, BlasFunctions *)
int csr_copy_predict (intp_t *data_size, char *data, intp_t *index_size,
char *index, intp_t *intptr_size, char *size,
svm_csr_model *model, char *dec_values, BlasFunctions *) nogil
int csr_copy_predict_proba (intp_t *data_size, char *data, intp_t *index_size,
char *index, intp_t *intptr_size, char *size,
svm_csr_model *model, char *dec_values, BlasFunctions *) nogil
int copy_predict_values(char *, svm_csr_model *, intp_t *, char *, int, BlasFunctions *)
int csr_copy_SV (char *values, intp_t *n_indices,
char *indices, intp_t *n_indptr, char *indptr,
svm_csr_model *model, int n_features)
intp_t get_nonzero_SV (svm_csr_model *)
void copy_nSV (char *, svm_csr_model *)
void copy_probA (char *, svm_csr_model *, intp_t *)
void copy_probB (char *, svm_csr_model *, intp_t *)
intp_t get_l (svm_csr_model *)
intp_t get_nr (svm_csr_model *)
int free_problem (svm_csr_problem *)
int free_model (svm_csr_model *)
int free_param (svm_parameter *)
int free_model_SV(svm_csr_model *model)
void set_verbosity(int)
def libsvm_sparse_train (int n_features,
const float64_t[::1] values,
const int32_t[::1] indices,
const int32_t[::1] indptr,
const float64_t[::1] Y,
int svm_type, int kernel_type, int degree, double gamma,
double coef0, double eps, double C,
const float64_t[::1] class_weight,
const float64_t[::1] sample_weight,
double nu, double cache_size, double p, int
shrinking, int probability, int max_iter,
int random_seed):
"""
Wrap svm_train from libsvm using a scipy.sparse.csr matrix
Work in progress.
Parameters
----------
n_features : number of features.
XXX: can we retrieve this from any other parameter ?
X : array-like, dtype=float, size=[N, D]
Y : array, dtype=float, size=[N]
target vector
...
Notes
-------------------
See sklearn.svm.predict for a complete list of parameters.
"""
cdef svm_parameter *param
cdef svm_csr_problem *problem
cdef svm_csr_model *model
cdef const_char_p error_msg
if len(sample_weight) == 0:
sample_weight = np.ones(Y.shape[0], dtype=np.float64)
else:
assert sample_weight.shape[0] == indptr.shape[0] - 1, \
"sample_weight and X have incompatible shapes: " + \
"sample_weight has %s samples while X has %s" % \
(sample_weight.shape[0], indptr.shape[0] - 1)
# we should never end up here with a precomputed kernel matrix,
# as this is always dense.
assert(kernel_type != 4)
# set libsvm problem
problem = csr_set_problem(
<char *> &values[0],
<intp_t *> indices.shape,
<char *> &indices[0],
<intp_t *> indptr.shape,
<char *> &indptr[0],
<char *> &Y[0],
<char *> &sample_weight[0],
kernel_type,
)
cdef int32_t[::1] \
class_weight_label = np.arange(class_weight.shape[0], dtype=np.int32)
# set parameters
param = set_parameter(
svm_type,
kernel_type,
degree,
gamma,
coef0,
nu,
cache_size,
C,
eps,
p,
shrinking,
probability,
<int> class_weight.shape[0],
<char *> &class_weight_label[0] if class_weight_label.size > 0 else NULL,
<char *> &class_weight[0] if class_weight.size > 0 else NULL, max_iter,
random_seed,
)
# check parameters
if (param == NULL or problem == NULL):
raise MemoryError("Seems we've run out of memory")
error_msg = svm_csr_check_parameter(problem, param)
if error_msg:
free_problem(problem)
free_param(param)
raise ValueError(error_msg)
cdef BlasFunctions blas_functions
blas_functions.dot = _dot[double]
# call svm_train, this does the real work
cdef int fit_status = 0
with nogil:
model = svm_csr_train(problem, param, &fit_status, &blas_functions)
cdef intp_t SV_len = get_l(model)
cdef intp_t n_class = get_nr(model)
cdef int[::1] n_iter
n_iter = np.empty(max(1, n_class * (n_class - 1) // 2), dtype=np.intc)
copy_n_iter(<char *> &n_iter[0], model)
# copy model.sv_coef
# we create a new array instead of resizing, otherwise
# it would not erase previous information
cdef float64_t[::1] sv_coef_data
sv_coef_data = np.empty((n_class-1)*SV_len, dtype=np.float64)
copy_sv_coef (<char *> &sv_coef_data[0] if sv_coef_data.size > 0 else NULL, model)
cdef int32_t[::1] support
support = np.empty(SV_len, dtype=np.int32)
copy_support(<char *> &support[0] if support.size > 0 else NULL, model)
# copy model.rho into the intercept
# the intercept is just model.rho but with sign changed
cdef float64_t[::1]intercept
intercept = np.empty(n_class*(n_class-1)//2, dtype=np.float64)
copy_intercept (<char *> &intercept[0], model, <intp_t *> intercept.shape)
# copy model.SV
# we erase any previous information in SV
# TODO: custom kernel
cdef intp_t nonzero_SV
nonzero_SV = get_nonzero_SV (model)
cdef float64_t[::1] SV_data
cdef int32_t[::1] SV_indices, SV_indptr
SV_data = np.empty(nonzero_SV, dtype=np.float64)
SV_indices = np.empty(nonzero_SV, dtype=np.int32)
SV_indptr = np.empty(<intp_t>SV_len + 1, dtype=np.int32)
csr_copy_SV(
<char *> &SV_data[0] if SV_data.size > 0 else NULL,
<intp_t *> SV_indices.shape,
<char *> &SV_indices[0] if SV_indices.size > 0 else NULL,
<intp_t *> SV_indptr.shape,
<char *> &SV_indptr[0] if SV_indptr.size > 0 else NULL,
model,
n_features,
)
support_vectors_ = sparse.csr_matrix(
(SV_data, SV_indices, SV_indptr), (SV_len, n_features)
)
# copy model.nSV
# TODO: do only in classification
cdef int32_t[::1]n_class_SV
n_class_SV = np.empty(n_class, dtype=np.int32)
copy_nSV(<char *> &n_class_SV[0], model)
# # copy probabilities
cdef float64_t[::1] probA, probB
if probability != 0:
if svm_type < 2: # SVC and NuSVC
probA = np.empty(n_class*(n_class-1)//2, dtype=np.float64)
probB = np.empty(n_class*(n_class-1)//2, dtype=np.float64)
copy_probB(<char *> &probB[0], model, <intp_t *> probB.shape)
else:
probA = np.empty(1, dtype=np.float64)
probB = np.empty(0, dtype=np.float64)
copy_probA(<char *> &probA[0], model, <intp_t *> probA.shape)
else:
probA = np.empty(0, dtype=np.float64)
probB = np.empty(0, dtype=np.float64)
svm_csr_free_and_destroy_model (&model)
free_problem(problem)
free_param(param)
return (
support.base,
support_vectors_,
sv_coef_data.base,
intercept.base,
n_class_SV.base,
probA.base,
probB.base,
fit_status,
n_iter.base,
)
def libsvm_sparse_predict (const float64_t[::1] T_data,
const int32_t[::1] T_indices,
const int32_t[::1] T_indptr,
const float64_t[::1] SV_data,
const int32_t[::1] SV_indices,
const int32_t[::1] SV_indptr,
const float64_t[::1] sv_coef,
const float64_t[::1]
intercept, int svm_type, int kernel_type, int
degree, double gamma, double coef0, double
eps, double C,
const float64_t[:] class_weight,
double nu, double p, int
shrinking, int probability,
const int32_t[::1] nSV,
const float64_t[::1] probA,
const float64_t[::1] probB):
"""
Predict values T given a model.
For speed, all real work is done at the C level in function
copy_predict (libsvm_helper.c).
We have to reconstruct model and parameters to make sure we stay
in sync with the python object.
See sklearn.svm.predict for a complete list of parameters.
Parameters
----------
X : array-like, dtype=float
Y : array
target vector
Returns
-------
dec_values : array
predicted values.
"""
cdef float64_t[::1] dec_values
cdef svm_parameter *param
cdef svm_csr_model *model
cdef int32_t[::1] \
class_weight_label = np.arange(class_weight.shape[0], dtype=np.int32)
cdef int rv
param = set_parameter(
svm_type,
kernel_type,
degree,
gamma,
coef0,
nu,
100.0, # cache size has no effect on predict
C,
eps,
p,
shrinking,
probability,
<int> class_weight.shape[0],
<char *> &class_weight_label[0] if class_weight_label.size > 0 else NULL,
<char *> &class_weight[0] if class_weight.size > 0 else NULL,
-1,
-1, # random seed has no effect on predict either
)
model = csr_set_model(
param, <int> nSV.shape[0],
<char *> &SV_data[0] if SV_data.size > 0 else NULL,
<intp_t *>SV_indices.shape,
<char *> &SV_indices[0] if SV_indices.size > 0 else NULL,
<intp_t *> SV_indptr.shape,
<char *> &SV_indptr[0] if SV_indptr.size > 0 else NULL,
<char *> &sv_coef[0] if sv_coef.size > 0 else NULL,
<char *> &intercept[0],
<char *> &nSV[0],
<char *> &probA[0] if probA.size > 0 else NULL,
<char *> &probB[0] if probB.size > 0 else NULL,
)
# TODO: use check_model
dec_values = np.empty(T_indptr.shape[0]-1)
cdef BlasFunctions blas_functions
blas_functions.dot = _dot[double]
with nogil:
rv = csr_copy_predict(
<intp_t *> T_data.shape,
<char *> &T_data[0],
<intp_t *> T_indices.shape,
<char *> &T_indices[0],
<intp_t *> T_indptr.shape,
<char *> &T_indptr[0],
model,
<char *> &dec_values[0],
&blas_functions,
)
if rv < 0:
raise MemoryError("We've run out of memory")
# free model and param
free_model_SV(model)
free_model(model)
free_param(param)
return dec_values.base
def libsvm_sparse_predict_proba(
const float64_t[::1] T_data,
const int32_t[::1] T_indices,
const int32_t[::1] T_indptr,
const float64_t[::1] SV_data,
const int32_t[::1] SV_indices,
const int32_t[::1] SV_indptr,
const float64_t[::1] sv_coef,
const float64_t[::1]
intercept, int svm_type, int kernel_type, int
degree, double gamma, double coef0, double
eps, double C,
const float64_t[:] class_weight,
double nu, double p, int shrinking, int probability,
const int32_t[::1] nSV,
const float64_t[::1] probA,
const float64_t[::1] probB,
):
"""
Predict values T given a model.
"""
cdef float64_t[:, ::1] dec_values
cdef svm_parameter *param
cdef svm_csr_model *model
cdef int32_t[::1] \
class_weight_label = np.arange(class_weight.shape[0], dtype=np.int32)
param = set_parameter(
svm_type,
kernel_type,
degree,
gamma,
coef0,
nu,
100.0, # cache size has no effect on predict
C,
eps,
p,
shrinking,
probability,
<int> class_weight.shape[0],
<char *> &class_weight_label[0] if class_weight_label.size > 0 else NULL,
<char *> &class_weight[0] if class_weight.size > 0 else NULL,
-1,
-1, # random seed has no effect on predict either
)
model = csr_set_model(
param,
<int> nSV.shape[0],
<char *> &SV_data[0] if SV_data.size > 0 else NULL,
<intp_t *> SV_indices.shape,
<char *> &SV_indices[0] if SV_indices.size > 0 else NULL,
<intp_t *> SV_indptr.shape,
<char *> &SV_indptr[0] if SV_indptr.size > 0 else NULL,
<char *> &sv_coef[0] if sv_coef.size > 0 else NULL,
<char *> &intercept[0],
<char *> &nSV[0],
<char *> &probA[0] if probA.size > 0 else NULL,
<char *> &probB[0] if probB.size > 0 else NULL,
)
# TODO: use check_model
cdef intp_t n_class = get_nr(model)
cdef int rv
dec_values = np.empty((T_indptr.shape[0]-1, n_class), dtype=np.float64)
cdef BlasFunctions blas_functions
blas_functions.dot = _dot[double]
with nogil:
rv = csr_copy_predict_proba(
<intp_t *> T_data.shape,
<char *> &T_data[0],
<intp_t *> T_indices.shape,
<char *> &T_indices[0],
<intp_t *> T_indptr.shape,
<char *> &T_indptr[0],
model,
<char *> &dec_values[0, 0],
&blas_functions,
)
if rv < 0:
raise MemoryError("We've run out of memory")
# free model and param
free_model_SV(model)
free_model(model)
free_param(param)
return dec_values.base
def libsvm_sparse_decision_function(
const float64_t[::1] T_data,
const int32_t[::1] T_indices,
const int32_t[::1] T_indptr,
const float64_t[::1] SV_data,
const int32_t[::1] SV_indices,
const int32_t[::1] SV_indptr,
const float64_t[::1] sv_coef,
const float64_t[::1]
intercept, int svm_type, int kernel_type, int
degree, double gamma, double coef0, double
eps, double C,
const float64_t[:] class_weight,
double nu, double p, int shrinking, int probability,
const int32_t[::1] nSV,
const float64_t[::1] probA,
const float64_t[::1] probB,
):
"""
Predict margin (libsvm name for this is predict_values)
We have to reconstruct model and parameters to make sure we stay
in sync with the python object.
"""
cdef float64_t[:, ::1] dec_values
cdef svm_parameter *param
cdef intp_t n_class
cdef svm_csr_model *model
cdef int32_t[::1] \
class_weight_label = np.arange(class_weight.shape[0], dtype=np.int32)
param = set_parameter(
svm_type,
kernel_type,
degree,
gamma,
coef0,
nu,
100.0, # cache size has no effect on predict
C,
eps,
p,
shrinking,
probability,
<int> class_weight.shape[0],
<char *> &class_weight_label[0] if class_weight_label.size > 0 else NULL,
<char *> &class_weight[0] if class_weight.size > 0 else NULL,
-1,
-1,
)
model = csr_set_model(
param,
<int> nSV.shape[0],
<char *> &SV_data[0] if SV_data.size > 0 else NULL,
<intp_t *> SV_indices.shape,
<char *> &SV_indices[0] if SV_indices.size > 0 else NULL,
<intp_t *> SV_indptr.shape,
<char *> &SV_indptr[0] if SV_indptr.size > 0 else NULL,
<char *> &sv_coef[0] if sv_coef.size > 0 else NULL,
<char *> &intercept[0],
<char *> &nSV[0],
<char *> &probA[0] if probA.size > 0 else NULL,
<char *> &probB[0] if probB.size > 0 else NULL,
)
if svm_type > 1:
n_class = 1
else:
n_class = get_nr(model)
n_class = n_class * (n_class - 1) // 2
dec_values = np.empty((T_indptr.shape[0] - 1, n_class), dtype=np.float64)
cdef BlasFunctions blas_functions
blas_functions.dot = _dot[double]
if csr_copy_predict_values(
<intp_t *> T_data.shape,
<char *> &T_data[0],
<intp_t *> T_indices.shape,
<char *> &T_indices[0],
<intp_t *> T_indptr.shape,
<char *> &T_indptr[0],
model,
<char *> &dec_values[0, 0],
n_class,
&blas_functions,
) < 0:
raise MemoryError("We've run out of memory")
# free model and param
free_model_SV(model)
free_model(model)
free_param(param)
return dec_values.base
def set_verbosity_wrap(int verbosity):
"""
Control verbosity of libsvm library
"""
set_verbosity(verbosity)
@@ -0,0 +1,13 @@
"""Wrapper for newrand.h"""
cdef extern from "newrand.h":
void set_seed(unsigned int)
unsigned int bounded_rand_int(unsigned int)
def set_seed_wrap(unsigned int custom_seed):
set_seed(custom_seed)
def bounded_rand_int_wrap(unsigned int range_):
return bounded_rand_int(range_)
@@ -0,0 +1,53 @@
newrand_include = include_directories('src/newrand')
libsvm_include = include_directories('src/libsvm')
liblinear_include = include_directories('src/liblinear')
_newrand = py.extension_module(
'_newrand',
'_newrand.pyx',
override_options: ['cython_language=cpp'],
include_directories: [newrand_include],
cython_args: cython_args,
subdir: 'sklearn/svm',
install: true
)
libsvm_skl = static_library(
'libsvm-skl',
['src/libsvm/libsvm_template.cpp'],
)
py.extension_module(
'_libsvm',
['_libsvm.pyx', utils_cython_tree],
include_directories: [newrand_include, libsvm_include],
link_with: libsvm_skl,
cython_args: cython_args,
subdir: 'sklearn/svm',
install: true
)
py.extension_module(
'_libsvm_sparse',
['_libsvm_sparse.pyx', utils_cython_tree],
include_directories: [newrand_include, libsvm_include],
link_with: libsvm_skl,
cython_args: cython_args,
subdir: 'sklearn/svm',
install: true
)
liblinear_skl = static_library(
'liblinear-skl',
['src/liblinear/linear.cpp', 'src/liblinear/tron.cpp'],
)
py.extension_module(
'_liblinear',
['_liblinear.pyx', utils_cython_tree],
include_directories: [newrand_include, liblinear_include],
link_with: [liblinear_skl],
cython_args: cython_args,
subdir: 'sklearn/svm',
install: true
)
@@ -0,0 +1,31 @@
Copyright (c) 2007-2014 The LIBLINEAR Project.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
3. Neither name of copyright holders nor the names of its contributors
may be used to endorse or promote products derived from this software
without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
@@ -0,0 +1,16 @@
#ifndef _CYTHON_BLAS_HELPERS_H
#define _CYTHON_BLAS_HELPERS_H
typedef double (*dot_func)(int, const double*, int, const double*, int);
typedef void (*axpy_func)(int, double, const double*, int, double*, int);
typedef void (*scal_func)(int, double, const double*, int);
typedef double (*nrm2_func)(int, const double*, int);
typedef struct BlasFunctions{
dot_func dot;
axpy_func axpy;
scal_func scal;
nrm2_func nrm2;
} BlasFunctions;
#endif
@@ -0,0 +1,236 @@
#include <stdlib.h>
#define PY_SSIZE_T_CLEAN
#include <Python.h>
#include "linear.h"
/*
* Convert matrix to sparse representation suitable for liblinear. x is
* expected to be an array of length n_samples*n_features.
*
* Whether the matrix is densely or sparsely populated, the fastest way to
* convert it to liblinear's sparse format is to calculate the amount of memory
* needed and allocate a single big block.
*
* Special care must be taken with indices, since liblinear indices start at 1
* and not at 0.
*
* If bias is > 0, we append an item at the end.
*/
static struct feature_node **dense_to_sparse(char *x, int double_precision,
int n_samples, int n_features, int n_nonzero, double bias)
{
float *x32 = (float *)x;
double *x64 = (double *)x;
struct feature_node **sparse;
int i, j; /* number of nonzero elements in row i */
struct feature_node *T; /* pointer to the top of the stack */
int have_bias = (bias > 0);
sparse = malloc (n_samples * sizeof(struct feature_node *));
if (sparse == NULL)
return NULL;
n_nonzero += (have_bias+1) * n_samples;
T = malloc (n_nonzero * sizeof(struct feature_node));
if (T == NULL) {
free(sparse);
return NULL;
}
for (i=0; i<n_samples; ++i) {
sparse[i] = T;
for (j=1; j<=n_features; ++j) {
if (double_precision) {
if (*x64 != 0) {
T->value = *x64;
T->index = j;
++ T;
}
++ x64; /* go to next element */
} else {
if (*x32 != 0) {
T->value = *x32;
T->index = j;
++ T;
}
++ x32; /* go to next element */
}
}
/* set bias element */
if (have_bias) {
T->value = bias;
T->index = j;
++ T;
}
/* set sentinel */
T->index = -1;
++ T;
}
return sparse;
}
/*
* Convert scipy.sparse.csr to liblinear's sparse data structure
*/
static struct feature_node **csr_to_sparse(char *x, int double_precision,
int *indices, int *indptr, int n_samples, int n_features, int n_nonzero,
double bias)
{
float *x32 = (float *)x;
double *x64 = (double *)x;
struct feature_node **sparse;
int i, j=0, k=0, n;
struct feature_node *T;
int have_bias = (bias > 0);
sparse = malloc (n_samples * sizeof(struct feature_node *));
if (sparse == NULL)
return NULL;
n_nonzero += (have_bias+1) * n_samples;
T = malloc (n_nonzero * sizeof(struct feature_node));
if (T == NULL) {
free(sparse);
return NULL;
}
for (i=0; i<n_samples; ++i) {
sparse[i] = T;
n = indptr[i+1] - indptr[i]; /* count elements in row i */
for (j=0; j<n; ++j) {
T->value = double_precision ? x64[k] : x32[k];
T->index = indices[k] + 1; /* liblinear uses 1-based indexing */
++T;
++k;
}
if (have_bias) {
T->value = bias;
T->index = n_features + 1;
++T;
++j;
}
/* set sentinel */
T->index = -1;
++T;
}
return sparse;
}
struct problem * set_problem(char *X, int double_precision_X, int n_samples,
int n_features, int n_nonzero, double bias, char* sample_weight,
char *Y)
{
struct problem *problem;
/* not performant but simple */
problem = malloc(sizeof(struct problem));
if (problem == NULL) return NULL;
problem->l = n_samples;
problem->n = n_features + (bias > 0);
problem->y = (double *) Y;
problem->W = (double *) sample_weight;
problem->x = dense_to_sparse(X, double_precision_X, n_samples, n_features,
n_nonzero, bias);
problem->bias = bias;
if (problem->x == NULL) {
free(problem);
return NULL;
}
return problem;
}
struct problem * csr_set_problem (char *X, int double_precision_X,
char *indices, char *indptr, int n_samples, int n_features,
int n_nonzero, double bias, char *sample_weight, char *Y)
{
struct problem *problem;
problem = malloc (sizeof (struct problem));
if (problem == NULL) return NULL;
problem->l = n_samples;
problem->n = n_features + (bias > 0);
problem->y = (double *) Y;
problem->W = (double *) sample_weight;
problem->x = csr_to_sparse(X, double_precision_X, (int *) indices,
(int *) indptr, n_samples, n_features, n_nonzero, bias);
problem->bias = bias;
if (problem->x == NULL) {
free(problem);
return NULL;
}
return problem;
}
/* Create a parameter struct with and return it */
struct parameter *set_parameter(int solver_type, double eps, double C,
Py_ssize_t nr_weight, char *weight_label,
char *weight, int max_iter, unsigned seed,
double epsilon)
{
struct parameter *param = malloc(sizeof(struct parameter));
if (param == NULL)
return NULL;
set_seed(seed);
param->solver_type = solver_type;
param->eps = eps;
param->C = C;
param->p = epsilon; // epsilon for epsilon-SVR
param->nr_weight = (int) nr_weight;
param->weight_label = (int *) weight_label;
param->weight = (double *) weight;
param->max_iter = max_iter;
return param;
}
void copy_w(void *data, struct model *model, int len)
{
memcpy(data, model->w, len * sizeof(double));
}
double get_bias(struct model *model)
{
return model->bias;
}
void free_problem(struct problem *problem)
{
free(problem->x[0]);
free(problem->x);
free(problem);
}
void free_parameter(struct parameter *param)
{
free(param);
}
/* rely on built-in facility to control verbose output */
static void print_null(const char *s) {}
static void print_string_stdout(const char *s)
{
fputs(s ,stdout);
fflush(stdout);
}
/* provide convenience wrapper */
void set_verbosity(int verbosity_flag){
if (verbosity_flag)
set_print_string_function(&print_string_stdout);
else
set_print_string_function(&print_null);
}
File diff suppressed because it is too large Load Diff
@@ -0,0 +1,87 @@
#ifndef _LIBLINEAR_H
#define _LIBLINEAR_H
#ifdef __cplusplus
extern "C" {
#endif
#include "_cython_blas_helpers.h"
struct feature_node
{
int index;
double value;
};
struct problem
{
int l, n;
double *y;
struct feature_node **x;
double bias; /* < 0 if no bias term */
double *W;
};
enum { L2R_LR, L2R_L2LOSS_SVC_DUAL, L2R_L2LOSS_SVC, L2R_L1LOSS_SVC_DUAL, MCSVM_CS, L1R_L2LOSS_SVC, L1R_LR, L2R_LR_DUAL, L2R_L2LOSS_SVR = 11, L2R_L2LOSS_SVR_DUAL, L2R_L1LOSS_SVR_DUAL }; /* solver_type */
struct parameter
{
int solver_type;
/* these are for training only */
double eps; /* stopping criteria */
double C;
int nr_weight;
int *weight_label;
double* weight;
int max_iter;
double p;
};
struct model
{
struct parameter param;
int nr_class; /* number of classes */
int nr_feature;
double *w;
int *label; /* label of each class */
double bias;
int *n_iter; /* no. of iterations of each class */
};
void set_seed(unsigned seed);
struct model* train(const struct problem *prob, const struct parameter *param, BlasFunctions *blas_functions);
void cross_validation(const struct problem *prob, const struct parameter *param, int nr_fold, double *target);
double predict_values(const struct model *model_, const struct feature_node *x, double* dec_values);
double predict(const struct model *model_, const struct feature_node *x);
double predict_probability(const struct model *model_, const struct feature_node *x, double* prob_estimates);
int save_model(const char *model_file_name, const struct model *model_);
struct model *load_model(const char *model_file_name);
int get_nr_feature(const struct model *model_);
int get_nr_class(const struct model *model_);
void get_labels(const struct model *model_, int* label);
void get_n_iter(const struct model *model_, int* n_iter);
#if 0
double get_decfun_coef(const struct model *model_, int feat_idx, int label_idx);
double get_decfun_bias(const struct model *model_, int label_idx);
#endif
void free_model_content(struct model *model_ptr);
void free_and_destroy_model(struct model **model_ptr_ptr);
void destroy_param(struct parameter *param);
const char *check_parameter(const struct problem *prob, const struct parameter *param);
int check_probability_model(const struct model *model);
int check_regression_model(const struct model *model);
void set_print_string_function(void (*print_func) (const char*));
#ifdef __cplusplus
}
#endif
#endif /* _LIBLINEAR_H */
@@ -0,0 +1,223 @@
#include <math.h>
#include <stdio.h>
#include <string.h>
#include <stdarg.h>
#include "tron.h"
#ifndef min
template <class T> static inline T min(T x,T y) { return (x<y)?x:y; }
#endif
#ifndef max
template <class T> static inline T max(T x,T y) { return (x>y)?x:y; }
#endif
static void default_print(const char *buf)
{
fputs(buf,stdout);
fflush(stdout);
}
void TRON::info(const char *fmt,...)
{
char buf[BUFSIZ];
va_list ap;
va_start(ap,fmt);
vsprintf(buf,fmt,ap);
va_end(ap);
(*tron_print_string)(buf);
}
TRON::TRON(const function *fun_obj, double eps, int max_iter, BlasFunctions *blas)
{
this->fun_obj=const_cast<function *>(fun_obj);
this->eps=eps;
this->max_iter=max_iter;
this->blas=blas;
tron_print_string = default_print;
}
TRON::~TRON()
{
}
int TRON::tron(double *w)
{
// Parameters for updating the iterates.
double eta0 = 1e-4, eta1 = 0.25, eta2 = 0.75;
// Parameters for updating the trust region size delta.
double sigma1 = 0.25, sigma2 = 0.5, sigma3 = 4;
int n = fun_obj->get_nr_variable();
int i, cg_iter;
double delta, snorm;
double alpha, f, fnew, prered, actred, gs;
int search = 1, iter = 1, inc = 1;
double *s = new double[n];
double *r = new double[n];
double *w_new = new double[n];
double *g = new double[n];
for (i=0; i<n; i++)
w[i] = 0;
f = fun_obj->fun(w);
fun_obj->grad(w, g);
delta = blas->nrm2(n, g, inc);
double gnorm1 = delta;
double gnorm = gnorm1;
if (gnorm <= eps*gnorm1)
search = 0;
iter = 1;
while (iter <= max_iter && search)
{
cg_iter = trcg(delta, g, s, r);
memcpy(w_new, w, sizeof(double)*n);
blas->axpy(n, 1.0, s, inc, w_new, inc);
gs = blas->dot(n, g, inc, s, inc);
prered = -0.5*(gs - blas->dot(n, s, inc, r, inc));
fnew = fun_obj->fun(w_new);
// Compute the actual reduction.
actred = f - fnew;
// On the first iteration, adjust the initial step bound.
snorm = blas->nrm2(n, s, inc);
if (iter == 1)
delta = min(delta, snorm);
// Compute prediction alpha*snorm of the step.
if (fnew - f - gs <= 0)
alpha = sigma3;
else
alpha = max(sigma1, -0.5*(gs/(fnew - f - gs)));
// Update the trust region bound according to the ratio of actual to predicted reduction.
if (actred < eta0*prered)
delta = min(max(alpha, sigma1)*snorm, sigma2*delta);
else if (actred < eta1*prered)
delta = max(sigma1*delta, min(alpha*snorm, sigma2*delta));
else if (actred < eta2*prered)
delta = max(sigma1*delta, min(alpha*snorm, sigma3*delta));
else
delta = max(delta, min(alpha*snorm, sigma3*delta));
info("iter %2d act %5.3e pre %5.3e delta %5.3e f %5.3e |g| %5.3e CG %3d\n", iter, actred, prered, delta, f, gnorm, cg_iter);
if (actred > eta0*prered)
{
iter++;
memcpy(w, w_new, sizeof(double)*n);
f = fnew;
fun_obj->grad(w, g);
gnorm = blas->nrm2(n, g, inc);
if (gnorm <= eps*gnorm1)
break;
}
if (f < -1.0e+32)
{
info("WARNING: f < -1.0e+32\n");
break;
}
if (fabs(actred) <= 0 && prered <= 0)
{
info("WARNING: actred and prered <= 0\n");
break;
}
if (fabs(actred) <= 1.0e-12*fabs(f) &&
fabs(prered) <= 1.0e-12*fabs(f))
{
info("WARNING: actred and prered too small\n");
break;
}
}
delete[] g;
delete[] r;
delete[] w_new;
delete[] s;
return --iter;
}
int TRON::trcg(double delta, double *g, double *s, double *r)
{
int i, inc = 1;
int n = fun_obj->get_nr_variable();
double *d = new double[n];
double *Hd = new double[n];
double rTr, rnewTrnew, alpha, beta, cgtol;
for (i=0; i<n; i++)
{
s[i] = 0;
r[i] = -g[i];
d[i] = r[i];
}
cgtol = 0.1 * blas->nrm2(n, g, inc);
int cg_iter = 0;
rTr = blas->dot(n, r, inc, r, inc);
while (1)
{
if (blas->nrm2(n, r, inc) <= cgtol)
break;
cg_iter++;
fun_obj->Hv(d, Hd);
alpha = rTr / blas->dot(n, d, inc, Hd, inc);
blas->axpy(n, alpha, d, inc, s, inc);
if (blas->nrm2(n, s, inc) > delta)
{
info("cg reaches trust region boundary\n");
alpha = -alpha;
blas->axpy(n, alpha, d, inc, s, inc);
double std = blas->dot(n, s, inc, d, inc);
double sts = blas->dot(n, s, inc, s, inc);
double dtd = blas->dot(n, d, inc, d, inc);
double dsq = delta*delta;
double rad = sqrt(std*std + dtd*(dsq-sts));
if (std >= 0)
alpha = (dsq - sts)/(std + rad);
else
alpha = (rad - std)/dtd;
blas->axpy(n, alpha, d, inc, s, inc);
alpha = -alpha;
blas->axpy(n, alpha, Hd, inc, r, inc);
break;
}
alpha = -alpha;
blas->axpy(n, alpha, Hd, inc, r, inc);
rnewTrnew = blas->dot(n, r, inc, r, inc);
beta = rnewTrnew/rTr;
blas->scal(n, beta, d, inc);
blas->axpy(n, 1.0, r, inc, d, inc);
rTr = rnewTrnew;
}
delete[] d;
delete[] Hd;
return(cg_iter);
}
double TRON::norm_inf(int n, double *x)
{
double dmax = fabs(x[0]);
for (int i=1; i<n; i++)
if (fabs(x[i]) >= dmax)
dmax = fabs(x[i]);
return(dmax);
}
void TRON::set_print_string(void (*print_string) (const char *buf))
{
tron_print_string = print_string;
}
@@ -0,0 +1,37 @@
#ifndef _TRON_H
#define _TRON_H
#include "_cython_blas_helpers.h"
class function
{
public:
virtual double fun(double *w) = 0 ;
virtual void grad(double *w, double *g) = 0 ;
virtual void Hv(double *s, double *Hs) = 0 ;
virtual int get_nr_variable(void) = 0 ;
virtual ~function(void){}
};
class TRON
{
public:
TRON(const function *fun_obj, double eps = 0.1, int max_iter = 1000, BlasFunctions *blas = 0);
~TRON();
int tron(double *w);
void set_print_string(void (*i_print) (const char *buf));
private:
int trcg(double delta, double *g, double *s, double *r);
double norm_inf(int n, double *x);
double eps;
int max_iter;
function *fun_obj;
BlasFunctions *blas;
void info(const char *fmt,...);
void (*tron_print_string)(const char *buf);
};
#endif
@@ -0,0 +1,11 @@
Changes to Libsvm
This is here mainly as checklist for incorporation of new versions of libsvm.
* Add copyright to files svm.cpp and svm.h
* Add random_seed support and call to srand in fit function
* Improved random number generator (fix on windows, enhancement on other
platforms). See <https://github.com/scikit-learn/scikit-learn/pull/13511#issuecomment-481729756>
* invoke scipy blas api for svm kernel function to improve performance with speedup rate of 1.5X to 2X for dense data only. See <https://github.com/scikit-learn/scikit-learn/pull/16530>
* Expose the number of iterations run in optimization. See <https://github.com/scikit-learn/scikit-learn/pull/21408>
The changes made with respect to upstream are detailed in the heading of svm.cpp
@@ -0,0 +1,9 @@
#ifndef _SVM_CYTHON_BLAS_HELPERS_H
#define _SVM_CYTHON_BLAS_HELPERS_H
typedef double (*dot_func)(int, const double*, int, const double*, int);
typedef struct BlasFunctions{
dot_func dot;
} BlasFunctions;
#endif
@@ -0,0 +1,425 @@
#include <stdlib.h>
#define PY_SSIZE_T_CLEAN
#include <Python.h>
#include "svm.h"
#include "_svm_cython_blas_helpers.h"
#ifndef MAX
#define MAX(x, y) (((x) > (y)) ? (x) : (y))
#endif
/*
* Some helper methods for libsvm bindings.
*
* We need to access from python some parameters stored in svm_model
* but libsvm does not expose this structure, so we define it here
* along some utilities to convert from numpy arrays.
*
* License: BSD 3 clause
*
* Author: 2010 Fabian Pedregosa <fabian.pedregosa@inria.fr>
*/
/*
* Convert matrix to sparse representation suitable for libsvm. x is
* expected to be an array of length nrow*ncol.
*
* Typically the matrix will be dense, so we speed up the routine for
* this case. We create a temporary array temp that collects non-zero
* elements and after we just memcpy that to the proper array.
*
* Special care must be taken with indinces, since libsvm indices start
* at 1 and not at 0.
*
* Strictly speaking, the C standard does not require that structs are
* contiguous, but in practice its a reasonable assumption.
*
*/
struct svm_node *dense_to_libsvm (double *x, Py_ssize_t *dims)
{
struct svm_node *node;
Py_ssize_t len_row = dims[1];
double *tx = x;
int i;
node = malloc (dims[0] * sizeof(struct svm_node));
if (node == NULL) return NULL;
for (i=0; i<dims[0]; ++i) {
node[i].values = tx;
node[i].dim = (int) len_row;
node[i].ind = i; /* only used if kernel=precomputed, but not
too much overhead */
tx += len_row;
}
return node;
}
/*
* Fill an svm_parameter struct.
*/
void set_parameter(struct svm_parameter *param, int svm_type, int kernel_type, int degree,
double gamma, double coef0, double nu, double cache_size, double C,
double eps, double p, int shrinking, int probability, int nr_weight,
char *weight_label, char *weight, int max_iter, int random_seed)
{
param->svm_type = svm_type;
param->kernel_type = kernel_type;
param->degree = degree;
param->coef0 = coef0;
param->nu = nu;
param->cache_size = cache_size;
param->C = C;
param->eps = eps;
param->p = p;
param->shrinking = shrinking;
param->probability = probability;
param->nr_weight = nr_weight;
param->weight_label = (int *) weight_label;
param->weight = (double *) weight;
param->gamma = gamma;
param->max_iter = max_iter;
param->random_seed = random_seed;
}
/*
* Fill an svm_problem struct. problem->x will be malloc'd.
*/
void set_problem(struct svm_problem *problem, char *X, char *Y, char *sample_weight, Py_ssize_t *dims, int kernel_type)
{
if (problem == NULL) return;
problem->l = (int) dims[0]; /* number of samples */
problem->y = (double *) Y;
problem->x = dense_to_libsvm((double *) X, dims); /* implicit call to malloc */
problem->W = (double *) sample_weight;
}
/*
* Create and return an instance of svm_model.
*
* The copy of model->sv_coef should be straightforward, but
* unfortunately to represent a matrix numpy and libsvm use different
* approaches, so it requires some iteration.
*
* Possible issue: on 64 bits, the number of columns that numpy can
* store is a long, but libsvm enforces this number (model->l) to be
* an int, so we might have numpy matrices that do not fit into libsvm's
* data structure.
*
*/
struct svm_model *set_model(struct svm_parameter *param, int nr_class,
char *SV, Py_ssize_t *SV_dims,
char *support, Py_ssize_t *support_dims,
Py_ssize_t *sv_coef_strides,
char *sv_coef, char *rho, char *nSV,
char *probA, char *probB)
{
struct svm_model *model;
double *dsv_coef = (double *) sv_coef;
int i, m;
m = nr_class * (nr_class-1)/2;
if ((model = malloc(sizeof(struct svm_model))) == NULL)
goto model_error;
if ((model->nSV = malloc(nr_class * sizeof(int))) == NULL)
goto nsv_error;
if ((model->label = malloc(nr_class * sizeof(int))) == NULL)
goto label_error;
if ((model->sv_coef = malloc((nr_class-1)*sizeof(double *))) == NULL)
goto sv_coef_error;
if ((model->rho = malloc( m * sizeof(double))) == NULL)
goto rho_error;
// This is only allocated in dynamic memory while training.
model->n_iter = NULL;
model->nr_class = nr_class;
model->param = *param;
model->l = (int) support_dims[0];
if (param->kernel_type == PRECOMPUTED) {
if ((model->SV = malloc ((model->l) * sizeof(struct svm_node))) == NULL)
goto SV_error;
for (i=0; i<model->l; ++i) {
model->SV[i].ind = ((int *) support)[i];
model->SV[i].values = NULL;
}
} else {
model->SV = dense_to_libsvm((double *) SV, SV_dims);
}
/*
* regression and one-class does not use nSV, label.
* TODO: does this provoke memory leaks (we just malloc'ed them)?
*/
if (param->svm_type < 2) {
memcpy(model->nSV, nSV, model->nr_class * sizeof(int));
for(i=0; i < model->nr_class; i++)
model->label[i] = i;
}
for (i=0; i < model->nr_class-1; i++) {
model->sv_coef[i] = dsv_coef + i*(model->l);
}
for (i=0; i<m; ++i) {
(model->rho)[i] = -((double *) rho)[i];
}
/*
* just to avoid segfaults, these features are not wrapped but
* svm_destroy_model will try to free them.
*/
if (param->probability) {
if ((model->probA = malloc(m * sizeof(double))) == NULL)
goto probA_error;
memcpy(model->probA, probA, m * sizeof(double));
if ((model->probB = malloc(m * sizeof(double))) == NULL)
goto probB_error;
memcpy(model->probB, probB, m * sizeof(double));
} else {
model->probA = NULL;
model->probB = NULL;
}
/* We'll free SV ourselves */
model->free_sv = 0;
return model;
probB_error:
free(model->probA);
probA_error:
free(model->SV);
SV_error:
free(model->rho);
rho_error:
free(model->sv_coef);
sv_coef_error:
free(model->label);
label_error:
free(model->nSV);
nsv_error:
free(model);
model_error:
return NULL;
}
/*
* Get the number of support vectors in a model.
*/
Py_ssize_t get_l(struct svm_model *model)
{
return (Py_ssize_t) model->l;
}
/*
* Get the number of classes in a model, = 2 in regression/one class
* svm.
*/
Py_ssize_t get_nr(struct svm_model *model)
{
return (Py_ssize_t) model->nr_class;
}
/*
* Get the number of iterations run in optimization
*/
void copy_n_iter(char *data, struct svm_model *model)
{
const int n_models = MAX(1, model->nr_class * (model->nr_class-1) / 2);
memcpy(data, model->n_iter, n_models * sizeof(int));
}
/*
* Some helpers to convert from libsvm sparse data structures
* model->sv_coef is a double **, whereas data is just a double *,
* so we have to do some stupid copying.
*/
void copy_sv_coef(char *data, struct svm_model *model)
{
int i, len = model->nr_class-1;
double *temp = (double *) data;
for(i=0; i<len; ++i) {
memcpy(temp, model->sv_coef[i], sizeof(double) * model->l);
temp += model->l;
}
}
void copy_intercept(char *data, struct svm_model *model, Py_ssize_t *dims)
{
/* intercept = -rho */
Py_ssize_t i, n = dims[0];
double t, *ddata = (double *) data;
for (i=0; i<n; ++i) {
t = model->rho[i];
/* we do this to avoid ugly -0.0 */
*ddata = (t != 0) ? -t : 0;
++ddata;
}
}
/*
* This is a bit more complex since SV are stored as sparse
* structures, so we have to do the conversion on the fly and also
* iterate fast over data.
*/
void copy_SV(char *data, struct svm_model *model, Py_ssize_t *dims)
{
int i, n = model->l;
double *tdata = (double *) data;
int dim = model->SV[0].dim;
for (i=0; i<n; ++i) {
memcpy (tdata, model->SV[i].values, dim * sizeof(double));
tdata += dim;
}
}
void copy_support (char *data, struct svm_model *model)
{
memcpy (data, model->sv_ind, (model->l) * sizeof(int));
}
/*
* copy svm_model.nSV, an array with the number of SV for each class
* will be NULL in the case of SVR, OneClass
*/
void copy_nSV(char *data, struct svm_model *model)
{
if (model->label == NULL) return;
memcpy(data, model->nSV, model->nr_class * sizeof(int));
}
void copy_probA(char *data, struct svm_model *model, Py_ssize_t * dims)
{
memcpy(data, model->probA, dims[0] * sizeof(double));
}
void copy_probB(char *data, struct svm_model *model, Py_ssize_t * dims)
{
memcpy(data, model->probB, dims[0] * sizeof(double));
}
/*
* Predict using model.
*
* It will return -1 if we run out of memory.
*/
int copy_predict(char *predict, struct svm_model *model, Py_ssize_t *predict_dims,
char *dec_values, BlasFunctions *blas_functions)
{
double *t = (double *) dec_values;
struct svm_node *predict_nodes;
Py_ssize_t i;
predict_nodes = dense_to_libsvm((double *) predict, predict_dims);
if (predict_nodes == NULL)
return -1;
for(i=0; i<predict_dims[0]; ++i) {
*t = svm_predict(model, &predict_nodes[i], blas_functions);
++t;
}
free(predict_nodes);
return 0;
}
int copy_predict_values(char *predict, struct svm_model *model,
Py_ssize_t *predict_dims, char *dec_values, int nr_class, BlasFunctions *blas_functions)
{
Py_ssize_t i;
struct svm_node *predict_nodes;
predict_nodes = dense_to_libsvm((double *) predict, predict_dims);
if (predict_nodes == NULL)
return -1;
for(i=0; i<predict_dims[0]; ++i) {
svm_predict_values(model, &predict_nodes[i],
((double *) dec_values) + i*nr_class,
blas_functions);
}
free(predict_nodes);
return 0;
}
int copy_predict_proba(char *predict, struct svm_model *model, Py_ssize_t *predict_dims,
char *dec_values, BlasFunctions *blas_functions)
{
Py_ssize_t i, n, m;
struct svm_node *predict_nodes;
n = predict_dims[0];
m = (Py_ssize_t) model->nr_class;
predict_nodes = dense_to_libsvm((double *) predict, predict_dims);
if (predict_nodes == NULL)
return -1;
for(i=0; i<n; ++i) {
svm_predict_probability(model, &predict_nodes[i],
((double *) dec_values) + i*m,
blas_functions);
}
free(predict_nodes);
return 0;
}
/*
* Some free routines. Some of them are nontrivial since a lot of
* sharing happens across objects (they *must* be called in the
* correct order)
*/
int free_model(struct svm_model *model)
{
/* like svm_free_and_destroy_model, but does not free sv_coef[i] */
if (model == NULL) return -1;
free(model->SV);
/* We don't free sv_ind and n_iter, since we did not create them in
set_model */
/* free(model->sv_ind);
* free(model->n_iter);
*/
free(model->sv_coef);
free(model->rho);
free(model->label);
free(model->probA);
free(model->probB);
free(model->nSV);
free(model);
return 0;
}
int free_param(struct svm_parameter *param)
{
if (param == NULL) return -1;
free(param);
return 0;
}
/* borrowed from original libsvm code */
static void print_null(const char *s) {}
static void print_string_stdout(const char *s)
{
fputs(s,stdout);
fflush(stdout);
}
/* provide convenience wrapper */
void set_verbosity(int verbosity_flag){
if (verbosity_flag)
svm_set_print_string_function(&print_string_stdout);
else
svm_set_print_string_function(&print_null);
}
@@ -0,0 +1,472 @@
#include <stdlib.h>
#define PY_SSIZE_T_CLEAN
#include <Python.h>
#include "svm.h"
#include "_svm_cython_blas_helpers.h"
#ifndef MAX
#define MAX(x, y) (((x) > (y)) ? (x) : (y))
#endif
/*
* Convert scipy.sparse.csr to libsvm's sparse data structure
*/
struct svm_csr_node **csr_to_libsvm (double *values, int* indices, int* indptr, int n_samples)
{
struct svm_csr_node **sparse, *temp;
int i, j=0, k=0, n;
sparse = malloc (n_samples * sizeof(struct svm_csr_node *));
if (sparse == NULL)
return NULL;
for (i=0; i<n_samples; ++i) {
n = indptr[i+1] - indptr[i]; /* count elements in row i */
temp = malloc ((n+1) * sizeof(struct svm_csr_node));
if (temp == NULL) {
for (j=0; j<i; j++)
free(sparse[j]);
free(sparse);
return NULL;
}
for (j=0; j<n; ++j) {
temp[j].value = values[k];
temp[j].index = indices[k] + 1; /* libsvm uses 1-based indexing */
++k;
}
/* set sentinel */
temp[n].index = -1;
sparse[i] = temp;
}
return sparse;
}
struct svm_parameter * set_parameter(int svm_type, int kernel_type, int degree,
double gamma, double coef0, double nu, double cache_size, double C,
double eps, double p, int shrinking, int probability, int nr_weight,
char *weight_label, char *weight, int max_iter, int random_seed)
{
struct svm_parameter *param;
param = malloc(sizeof(struct svm_parameter));
if (param == NULL) return NULL;
param->svm_type = svm_type;
param->kernel_type = kernel_type;
param->degree = degree;
param->coef0 = coef0;
param->nu = nu;
param->cache_size = cache_size;
param->C = C;
param->eps = eps;
param->p = p;
param->shrinking = shrinking;
param->probability = probability;
param->nr_weight = nr_weight;
param->weight_label = (int *) weight_label;
param->weight = (double *) weight;
param->gamma = gamma;
param->max_iter = max_iter;
param->random_seed = random_seed;
return param;
}
/*
* Create and return a svm_csr_problem struct from a scipy.sparse.csr matrix. It is
* up to the user to free resulting structure.
*
* TODO: precomputed kernel.
*/
struct svm_csr_problem * csr_set_problem (char *values, Py_ssize_t *n_indices,
char *indices, Py_ssize_t *n_indptr, char *indptr, char *Y,
char *sample_weight, int kernel_type) {
struct svm_csr_problem *problem;
problem = malloc (sizeof (struct svm_csr_problem));
if (problem == NULL) return NULL;
problem->l = (int) n_indptr[0] - 1;
problem->y = (double *) Y;
problem->x = csr_to_libsvm((double *) values, (int *) indices,
(int *) indptr, problem->l);
/* should be removed once we implement weighted samples */
problem->W = (double *) sample_weight;
if (problem->x == NULL) {
free(problem);
return NULL;
}
return problem;
}
struct svm_csr_model *csr_set_model(struct svm_parameter *param, int nr_class,
char *SV_data, Py_ssize_t *SV_indices_dims,
char *SV_indices, Py_ssize_t *SV_indptr_dims,
char *SV_intptr,
char *sv_coef, char *rho, char *nSV,
char *probA, char *probB)
{
struct svm_csr_model *model;
double *dsv_coef = (double *) sv_coef;
int i, m;
m = nr_class * (nr_class-1)/2;
if ((model = malloc(sizeof(struct svm_csr_model))) == NULL)
goto model_error;
if ((model->nSV = malloc(nr_class * sizeof(int))) == NULL)
goto nsv_error;
if ((model->label = malloc(nr_class * sizeof(int))) == NULL)
goto label_error;
if ((model->sv_coef = malloc((nr_class-1)*sizeof(double *))) == NULL)
goto sv_coef_error;
if ((model->rho = malloc( m * sizeof(double))) == NULL)
goto rho_error;
// This is only allocated in dynamic memory while training.
model->n_iter = NULL;
/* in the case of precomputed kernels we do not use
dense_to_precomputed because we don't want the leading 0. As
indices start at 1 (not at 0) this will work */
model->l = (int) SV_indptr_dims[0] - 1;
model->SV = csr_to_libsvm((double *) SV_data, (int *) SV_indices,
(int *) SV_intptr, model->l);
model->nr_class = nr_class;
model->param = *param;
/*
* regression and one-class does not use nSV, label.
*/
if (param->svm_type < 2) {
memcpy(model->nSV, nSV, model->nr_class * sizeof(int));
for(i=0; i < model->nr_class; i++)
model->label[i] = i;
}
for (i=0; i < model->nr_class-1; i++) {
/*
* We cannot squash all this mallocs in a single call since
* svm_destroy_model will free each element of the array.
*/
if ((model->sv_coef[i] = malloc((model->l) * sizeof(double))) == NULL) {
int j;
for (j=0; j<i; j++)
free(model->sv_coef[j]);
goto sv_coef_i_error;
}
memcpy(model->sv_coef[i], dsv_coef, (model->l) * sizeof(double));
dsv_coef += model->l;
}
for (i=0; i<m; ++i) {
(model->rho)[i] = -((double *) rho)[i];
}
/*
* just to avoid segfaults, these features are not wrapped but
* svm_destroy_model will try to free them.
*/
if (param->probability) {
if ((model->probA = malloc(m * sizeof(double))) == NULL)
goto probA_error;
memcpy(model->probA, probA, m * sizeof(double));
if ((model->probB = malloc(m * sizeof(double))) == NULL)
goto probB_error;
memcpy(model->probB, probB, m * sizeof(double));
} else {
model->probA = NULL;
model->probB = NULL;
}
/* We'll free SV ourselves */
model->free_sv = 0;
return model;
probB_error:
free(model->probA);
probA_error:
for (i=0; i < model->nr_class-1; i++)
free(model->sv_coef[i]);
sv_coef_i_error:
free(model->rho);
rho_error:
free(model->sv_coef);
sv_coef_error:
free(model->label);
label_error:
free(model->nSV);
nsv_error:
free(model);
model_error:
return NULL;
}
/*
* Copy support vectors into a scipy.sparse.csr matrix
*/
int csr_copy_SV (char *data, Py_ssize_t *n_indices,
char *indices, Py_ssize_t *n_indptr, char *indptr,
struct svm_csr_model *model, int n_features)
{
int i, j, k=0, index;
double *dvalues = (double *) data;
int *iindices = (int *) indices;
int *iindptr = (int *) indptr;
iindptr[0] = 0;
for (i=0; i<model->l; ++i) { /* iterate over support vectors */
index = model->SV[i][0].index;
for(j=0; index >=0 ; ++j) {
iindices[k] = index - 1;
dvalues[k] = model->SV[i][j].value;
index = model->SV[i][j+1].index;
++k;
}
iindptr[i+1] = k;
}
return 0;
}
/* get number of nonzero coefficients in support vectors */
Py_ssize_t get_nonzero_SV (struct svm_csr_model *model) {
int i, j;
Py_ssize_t count=0;
for (i=0; i<model->l; ++i) {
j = 0;
while (model->SV[i][j].index != -1) {
++j;
++count;
}
}
return count;
}
/*
* Predict using a model, where data is expected to be encoded into a csr matrix.
*/
int csr_copy_predict (Py_ssize_t *data_size, char *data, Py_ssize_t *index_size,
char *index, Py_ssize_t *intptr_size, char *intptr, struct svm_csr_model *model,
char *dec_values, BlasFunctions *blas_functions) {
double *t = (double *) dec_values;
struct svm_csr_node **predict_nodes;
Py_ssize_t i;
predict_nodes = csr_to_libsvm((double *) data, (int *) index,
(int *) intptr, intptr_size[0]-1);
if (predict_nodes == NULL)
return -1;
for(i=0; i < intptr_size[0] - 1; ++i) {
*t = svm_csr_predict(model, predict_nodes[i], blas_functions);
free(predict_nodes[i]);
++t;
}
free(predict_nodes);
return 0;
}
int csr_copy_predict_values (Py_ssize_t *data_size, char *data, Py_ssize_t *index_size,
char *index, Py_ssize_t *intptr_size, char *intptr, struct svm_csr_model *model,
char *dec_values, int nr_class, BlasFunctions *blas_functions) {
struct svm_csr_node **predict_nodes;
Py_ssize_t i;
predict_nodes = csr_to_libsvm((double *) data, (int *) index,
(int *) intptr, intptr_size[0]-1);
if (predict_nodes == NULL)
return -1;
for(i=0; i < intptr_size[0] - 1; ++i) {
svm_csr_predict_values(model, predict_nodes[i],
((double *) dec_values) + i*nr_class,
blas_functions);
free(predict_nodes[i]);
}
free(predict_nodes);
return 0;
}
int csr_copy_predict_proba (Py_ssize_t *data_size, char *data, Py_ssize_t *index_size,
char *index, Py_ssize_t *intptr_size, char *intptr, struct svm_csr_model *model,
char *dec_values, BlasFunctions *blas_functions) {
struct svm_csr_node **predict_nodes;
Py_ssize_t i;
int m = model->nr_class;
predict_nodes = csr_to_libsvm((double *) data, (int *) index,
(int *) intptr, intptr_size[0]-1);
if (predict_nodes == NULL)
return -1;
for(i=0; i < intptr_size[0] - 1; ++i) {
svm_csr_predict_probability(
model, predict_nodes[i], ((double *) dec_values) + i*m, blas_functions);
free(predict_nodes[i]);
}
free(predict_nodes);
return 0;
}
Py_ssize_t get_nr(struct svm_csr_model *model)
{
return (Py_ssize_t) model->nr_class;
}
void copy_intercept(char *data, struct svm_csr_model *model, Py_ssize_t *dims)
{
/* intercept = -rho */
Py_ssize_t i, n = dims[0];
double t, *ddata = (double *) data;
for (i=0; i<n; ++i) {
t = model->rho[i];
/* we do this to avoid ugly -0.0 */
*ddata = (t != 0) ? -t : 0;
++ddata;
}
}
void copy_support (char *data, struct svm_csr_model *model)
{
memcpy (data, model->sv_ind, (model->l) * sizeof(int));
}
/*
* Some helpers to convert from libsvm sparse data structures
* model->sv_coef is a double **, whereas data is just a double *,
* so we have to do some stupid copying.
*/
void copy_sv_coef(char *data, struct svm_csr_model *model)
{
int i, len = model->nr_class-1;
double *temp = (double *) data;
for(i=0; i<len; ++i) {
memcpy(temp, model->sv_coef[i], sizeof(double) * model->l);
temp += model->l;
}
}
/*
* Get the number of iterations run in optimization
*/
void copy_n_iter(char *data, struct svm_csr_model *model)
{
const int n_models = MAX(1, model->nr_class * (model->nr_class-1) / 2);
memcpy(data, model->n_iter, n_models * sizeof(int));
}
/*
* Get the number of support vectors in a model.
*/
Py_ssize_t get_l(struct svm_csr_model *model)
{
return (Py_ssize_t) model->l;
}
void copy_nSV(char *data, struct svm_csr_model *model)
{
if (model->label == NULL) return;
memcpy(data, model->nSV, model->nr_class * sizeof(int));
}
/*
* same as above with model->label
* TODO: merge in the cython layer
*/
void copy_label(char *data, struct svm_csr_model *model)
{
if (model->label == NULL) return;
memcpy(data, model->label, model->nr_class * sizeof(int));
}
void copy_probA(char *data, struct svm_csr_model *model, Py_ssize_t * dims)
{
memcpy(data, model->probA, dims[0] * sizeof(double));
}
void copy_probB(char *data, struct svm_csr_model *model, Py_ssize_t * dims)
{
memcpy(data, model->probB, dims[0] * sizeof(double));
}
/*
* Some free routines. Some of them are nontrivial since a lot of
* sharing happens across objects (they *must* be called in the
* correct order)
*/
int free_problem(struct svm_csr_problem *problem)
{
int i;
if (problem == NULL) return -1;
for (i=0; i<problem->l; ++i)
free (problem->x[i]);
free (problem->x);
free (problem);
return 0;
}
int free_model(struct svm_csr_model *model)
{
/* like svm_free_and_destroy_model, but does not free sv_coef[i] */
/* We don't free n_iter, since we did not create them in set_model. */
if (model == NULL) return -1;
free(model->SV);
free(model->sv_coef);
free(model->rho);
free(model->label);
free(model->probA);
free(model->probB);
free(model->nSV);
free(model);
return 0;
}
int free_param(struct svm_parameter *param)
{
if (param == NULL) return -1;
free(param);
return 0;
}
int free_model_SV(struct svm_csr_model *model)
{
int i;
for (i=model->l-1; i>=0; --i) free(model->SV[i]);
/* svn_destroy_model frees model->SV */
for (i=0; i < model->nr_class-1 ; ++i) free(model->sv_coef[i]);
/* svn_destroy_model frees model->sv_coef */
return 0;
}
/* borrowed from original libsvm code */
static void print_null(const char *s) {}
static void print_string_stdout(const char *s)
{
fputs(s,stdout);
fflush(stdout);
}
/* provide convenience wrapper */
void set_verbosity(int verbosity_flag){
if (verbosity_flag)
svm_set_print_string_function(&print_string_stdout);
else
svm_set_print_string_function(&print_null);
}
@@ -0,0 +1,8 @@
/* this is a hack to generate libsvm with both sparse and dense
methods in the same binary*/
#define _DENSE_REP
#include "svm.cpp"
#undef _DENSE_REP
#include "svm.cpp"
File diff suppressed because it is too large Load Diff
@@ -0,0 +1,176 @@
#ifndef _LIBSVM_H
#define _LIBSVM_H
#define LIBSVM_VERSION 310
#ifdef __cplusplus
extern "C" {
#endif
#include "_svm_cython_blas_helpers.h"
struct svm_node
{
int dim;
int ind; /* index. A bit redundant, but needed if using a
precomputed kernel */
double *values;
};
struct svm_problem
{
int l;
double *y;
struct svm_node *x;
double *W; /* instance weights */
};
struct svm_csr_node
{
int index;
double value;
};
struct svm_csr_problem
{
int l;
double *y;
struct svm_csr_node **x;
double *W; /* instance weights */
};
enum { C_SVC, NU_SVC, ONE_CLASS, EPSILON_SVR, NU_SVR }; /* svm_type */
enum { LINEAR, POLY, RBF, SIGMOID, PRECOMPUTED }; /* kernel_type */
struct svm_parameter
{
int svm_type;
int kernel_type;
int degree; /* for poly */
double gamma; /* for poly/rbf/sigmoid */
double coef0; /* for poly/sigmoid */
/* these are for training only */
double cache_size; /* in MB */
double eps; /* stopping criteria */
double C; /* for C_SVC, EPSILON_SVR and NU_SVR */
int nr_weight; /* for C_SVC */
int *weight_label; /* for C_SVC */
double* weight; /* for C_SVC */
double nu; /* for NU_SVC, ONE_CLASS, and NU_SVR */
double p; /* for EPSILON_SVR */
int shrinking; /* use the shrinking heuristics */
int probability; /* do probability estimates */
int max_iter; /* ceiling on Solver runtime */
int random_seed; /* seed for random number generator */
};
//
// svm_model
//
struct svm_model
{
struct svm_parameter param; /* parameter */
int nr_class; /* number of classes, = 2 in regression/one class svm */
int l; /* total #SV */
struct svm_node *SV; /* SVs (SV[l]) */
double **sv_coef; /* coefficients for SVs in decision functions (sv_coef[k-1][l]) */
int *n_iter; /* number of iterations run by the optimization routine to fit the model */
int *sv_ind; /* index of support vectors */
double *rho; /* constants in decision functions (rho[k*(k-1)/2]) */
double *probA; /* pairwise probability information */
double *probB;
/* for classification only */
int *label; /* label of each class (label[k]) */
int *nSV; /* number of SVs for each class (nSV[k]) */
/* nSV[0] + nSV[1] + ... + nSV[k-1] = l */
/* XXX */
int free_sv; /* 1 if svm_model is created by svm_load_model*/
/* 0 if svm_model is created by svm_train */
};
struct svm_csr_model
{
struct svm_parameter param; /* parameter */
int nr_class; /* number of classes, = 2 in regression/one class svm */
int l; /* total #SV */
struct svm_csr_node **SV; /* SVs (SV[l]) */
double **sv_coef; /* coefficients for SVs in decision functions (sv_coef[k-1][l]) */
int *n_iter; /* number of iterations run by the optimization routine to fit the model */
int *sv_ind; /* index of support vectors */
double *rho; /* constants in decision functions (rho[k*(k-1)/2]) */
double *probA; /* pairwise probability information */
double *probB;
/* for classification only */
int *label; /* label of each class (label[k]) */
int *nSV; /* number of SVs for each class (nSV[k]) */
/* nSV[0] + nSV[1] + ... + nSV[k-1] = l */
/* XXX */
int free_sv; /* 1 if svm_model is created by svm_load_model*/
/* 0 if svm_model is created by svm_train */
};
/* svm_ functions are defined by libsvm_template.cpp from generic versions in svm.cpp */
struct svm_model *svm_train(const struct svm_problem *prob, const struct svm_parameter *param, int *status, BlasFunctions *blas_functions);
void svm_cross_validation(const struct svm_problem *prob, const struct svm_parameter *param, int nr_fold, double *target, BlasFunctions *blas_functions);
int svm_save_model(const char *model_file_name, const struct svm_model *model);
struct svm_model *svm_load_model(const char *model_file_name);
int svm_get_svm_type(const struct svm_model *model);
int svm_get_nr_class(const struct svm_model *model);
void svm_get_labels(const struct svm_model *model, int *label);
double svm_get_svr_probability(const struct svm_model *model);
double svm_predict_values(const struct svm_model *model, const struct svm_node *x, double* dec_values, BlasFunctions *blas_functions);
double svm_predict(const struct svm_model *model, const struct svm_node *x, BlasFunctions *blas_functions);
double svm_predict_probability(const struct svm_model *model, const struct svm_node *x, double* prob_estimates, BlasFunctions *blas_functions);
void svm_free_model_content(struct svm_model *model_ptr);
void svm_free_and_destroy_model(struct svm_model **model_ptr_ptr);
void svm_destroy_param(struct svm_parameter *param);
const char *svm_check_parameter(const struct svm_problem *prob, const struct svm_parameter *param);
void svm_set_print_string_function(void (*print_func)(const char *));
/* sparse version */
/* svm_csr_ functions are defined by libsvm_template.cpp from generic versions in svm.cpp */
struct svm_csr_model *svm_csr_train(const struct svm_csr_problem *prob, const struct svm_parameter *param, int *status, BlasFunctions *blas_functions);
void svm_csr_cross_validation(const struct svm_csr_problem *prob, const struct svm_parameter *param, int nr_fold, double *target, BlasFunctions *blas_functions);
int svm_csr_get_svm_type(const struct svm_csr_model *model);
int svm_csr_get_nr_class(const struct svm_csr_model *model);
void svm_csr_get_labels(const struct svm_csr_model *model, int *label);
double svm_csr_get_svr_probability(const struct svm_csr_model *model);
double svm_csr_predict_values(const struct svm_csr_model *model, const struct svm_csr_node *x, double* dec_values, BlasFunctions *blas_functions);
double svm_csr_predict(const struct svm_csr_model *model, const struct svm_csr_node *x, BlasFunctions *blas_functions);
double svm_csr_predict_probability(const struct svm_csr_model *model, const struct svm_csr_node *x, double* prob_estimates, BlasFunctions *blas_functions);
void svm_csr_free_model_content(struct svm_csr_model *model_ptr);
void svm_csr_free_and_destroy_model(struct svm_csr_model **model_ptr_ptr);
void svm_csr_destroy_param(struct svm_parameter *param);
const char *svm_csr_check_parameter(const struct svm_csr_problem *prob, const struct svm_parameter *param);
/* end sparse version */
#ifdef __cplusplus
}
#endif
#endif /* _LIBSVM_H */
@@ -0,0 +1,59 @@
/*
Creation, 2020:
- New random number generator using a mersenne twister + tweaked lemire
postprocessor. This fixed a convergence issue on windows targets for
libsvm and liblinear.
Sylvain Marie, Schneider Electric
See <https://github.com/scikit-learn/scikit-learn/pull/13511#issuecomment-481729756>
*/
#ifndef _NEWRAND_H
#define _NEWRAND_H
#ifdef __cplusplus
#include <random> // needed for cython to generate a .cpp file from newrand.h
extern "C" {
#endif
// Scikit-Learn-specific random number generator replacing `rand()` originally
// used in LibSVM / LibLinear, to ensure the same behaviour on windows-linux,
// with increased speed
// - (1) Init a `mt_rand` object
std::mt19937 mt_rand(std::mt19937::default_seed);
// - (2) public `set_seed()` function that should be used instead of `srand()` to set a new seed.
void set_seed(unsigned custom_seed) {
mt_rand.seed(custom_seed);
}
// - (3) New internal `bounded_rand_int` function, used instead of rand() everywhere.
inline uint32_t bounded_rand_int(uint32_t range) {
// "LibSVM / LibLinear Original way" - make a 31bit positive
// random number and use modulo to make it fit in the range
// return abs( (int)mt_rand()) % range;
// "Better way": tweaked Lemire post-processor
// from http://www.pcg-random.org/posts/bounded-rands.html
uint32_t x = mt_rand();
uint64_t m = uint64_t(x) * uint64_t(range);
uint32_t l = uint32_t(m);
if (l < range) {
uint32_t t = -range;
if (t >= range) {
t -= range;
if (t >= range)
t %= range;
}
while (l < t) {
x = mt_rand();
m = uint64_t(x) * uint64_t(range);
l = uint32_t(m);
}
}
return m >> 32;
}
#ifdef __cplusplus
}
#endif
#endif /* _NEWRAND_H */
@@ -0,0 +1,142 @@
import numpy as np
import pytest
from scipy import stats
from sklearn.linear_model import LogisticRegression
from sklearn.svm import LinearSVC
from sklearn.svm._bounds import l1_min_c
from sklearn.svm._newrand import bounded_rand_int_wrap, set_seed_wrap
from sklearn.utils.fixes import CSR_CONTAINERS
dense_X = [[-1, 0], [0, 1], [1, 1], [1, 1]]
Y1 = [0, 1, 1, 1]
Y2 = [2, 1, 0, 0]
@pytest.mark.parametrize("X_container", CSR_CONTAINERS + [np.array])
@pytest.mark.parametrize("loss", ["squared_hinge", "log"])
@pytest.mark.parametrize("Y_label", ["two-classes", "multi-class"])
@pytest.mark.parametrize("intercept_label", ["no-intercept", "fit-intercept"])
def test_l1_min_c(X_container, loss, Y_label, intercept_label):
Ys = {"two-classes": Y1, "multi-class": Y2}
intercepts = {
"no-intercept": {"fit_intercept": False},
"fit-intercept": {"fit_intercept": True, "intercept_scaling": 10},
}
X = X_container(dense_X)
Y = Ys[Y_label]
intercept_params = intercepts[intercept_label]
check_l1_min_c(X, Y, loss, **intercept_params)
def check_l1_min_c(X, y, loss, fit_intercept=True, intercept_scaling=1.0):
min_c = l1_min_c(
X,
y,
loss=loss,
fit_intercept=fit_intercept,
intercept_scaling=intercept_scaling,
)
clf = {
"log": LogisticRegression(penalty="l1", solver="liblinear"),
"squared_hinge": LinearSVC(loss="squared_hinge", penalty="l1", dual=False),
}[loss]
clf.fit_intercept = fit_intercept
clf.intercept_scaling = intercept_scaling
clf.C = min_c
clf.fit(X, y)
assert (np.asarray(clf.coef_) == 0).all()
assert (np.asarray(clf.intercept_) == 0).all()
clf.C = min_c * 1.01
clf.fit(X, y)
assert (np.asarray(clf.coef_) != 0).any() or (np.asarray(clf.intercept_) != 0).any()
def test_ill_posed_min_c():
X = [[0, 0], [0, 0]]
y = [0, 1]
with pytest.raises(ValueError):
l1_min_c(X, y)
_MAX_UNSIGNED_INT = 4294967295
def test_newrand_default():
"""Test that bounded_rand_int_wrap without seeding respects the range
Note this test should pass either if executed alone, or in conjunctions
with other tests that call set_seed explicit in any order: it checks
invariants on the RNG instead of specific values.
"""
generated = [bounded_rand_int_wrap(100) for _ in range(10)]
assert all(0 <= x < 100 for x in generated)
assert not all(x == generated[0] for x in generated)
@pytest.mark.parametrize("seed, expected", [(0, 54), (_MAX_UNSIGNED_INT, 9)])
def test_newrand_set_seed(seed, expected):
"""Test that `set_seed` produces deterministic results"""
set_seed_wrap(seed)
generated = bounded_rand_int_wrap(100)
assert generated == expected
@pytest.mark.parametrize("seed", [-1, _MAX_UNSIGNED_INT + 1])
def test_newrand_set_seed_overflow(seed):
"""Test that `set_seed_wrap` is defined for unsigned 32bits ints"""
with pytest.raises(OverflowError):
set_seed_wrap(seed)
@pytest.mark.parametrize("range_, n_pts", [(_MAX_UNSIGNED_INT, 10000), (100, 25)])
def test_newrand_bounded_rand_int(range_, n_pts):
"""Test that `bounded_rand_int` follows a uniform distribution"""
# XXX: this test is very seed sensitive: either it is wrong (too strict?)
# or the wrapped RNG is not uniform enough, at least on some platforms.
set_seed_wrap(42)
n_iter = 100
ks_pvals = []
uniform_dist = stats.uniform(loc=0, scale=range_)
# perform multiple samplings to make chance of outlier sampling negligible
for _ in range(n_iter):
# Deterministic random sampling
sample = [bounded_rand_int_wrap(range_) for _ in range(n_pts)]
res = stats.kstest(sample, uniform_dist.cdf)
ks_pvals.append(res.pvalue)
# Null hypothesis = samples come from an uniform distribution.
# Under the null hypothesis, p-values should be uniformly distributed
# and not concentrated on low values
# (this may seem counter-intuitive but is backed by multiple refs)
# So we can do two checks:
# (1) check uniformity of p-values
uniform_p_vals_dist = stats.uniform(loc=0, scale=1)
res_pvals = stats.kstest(ks_pvals, uniform_p_vals_dist.cdf)
assert res_pvals.pvalue > 0.05, (
"Null hypothesis rejected: generated random numbers are not uniform."
" Details: the (meta) p-value of the test of uniform distribution"
f" of p-values is {res_pvals.pvalue} which is not > 0.05"
)
# (2) (safety belt) check that 90% of p-values are above 0.05
min_10pct_pval = np.percentile(ks_pvals, q=10)
# lower 10th quantile pvalue <= 0.05 means that the test rejects the
# null hypothesis that the sample came from the uniform distribution
assert min_10pct_pval > 0.05, (
"Null hypothesis rejected: generated random numbers are not uniform. "
f"Details: lower 10th quantile p-value of {min_10pct_pval} not > 0.05."
)
@pytest.mark.parametrize("range_", [-1, _MAX_UNSIGNED_INT + 1])
def test_newrand_bounded_rand_int_limits(range_):
"""Test that `bounded_rand_int_wrap` is defined for unsigned 32bits ints"""
with pytest.raises(OverflowError):
bounded_rand_int_wrap(range_)
@@ -0,0 +1,493 @@
import numpy as np
import pytest
from scipy import sparse
from sklearn import base, datasets, linear_model, svm
from sklearn.datasets import load_digits, make_blobs, make_classification
from sklearn.exceptions import ConvergenceWarning
from sklearn.svm.tests import test_svm
from sklearn.utils._testing import (
assert_allclose,
assert_array_almost_equal,
assert_array_equal,
ignore_warnings,
skip_if_32bit,
)
from sklearn.utils.extmath import safe_sparse_dot
from sklearn.utils.fixes import (
CSR_CONTAINERS,
DOK_CONTAINERS,
LIL_CONTAINERS,
)
# test sample 1
X = np.array([[-2, -1], [-1, -1], [-1, -2], [1, 1], [1, 2], [2, 1]])
Y = [1, 1, 1, 2, 2, 2]
T = np.array([[-1, -1], [2, 2], [3, 2]])
true_result = [1, 2, 2]
# test sample 2
X2 = np.array(
[
[0, 0, 0],
[1, 1, 1],
[2, 0, 0],
[0, 0, 2],
[3, 3, 3],
]
)
Y2 = [1, 2, 2, 2, 3]
T2 = np.array([[-1, -1, -1], [1, 1, 1], [2, 2, 2]])
true_result2 = [1, 2, 3]
iris = datasets.load_iris()
rng = np.random.RandomState(0)
perm = rng.permutation(iris.target.size)
iris.data = iris.data[perm]
iris.target = iris.target[perm]
X_blobs, y_blobs = make_blobs(n_samples=100, centers=10, random_state=0)
def check_svm_model_equal(dense_svm, X_train, y_train, X_test):
# Use the original svm model for dense fit and clone an exactly same
# svm model for sparse fit
sparse_svm = base.clone(dense_svm)
dense_svm.fit(X_train.toarray(), y_train)
if sparse.issparse(X_test):
X_test_dense = X_test.toarray()
else:
X_test_dense = X_test
sparse_svm.fit(X_train, y_train)
assert sparse.issparse(sparse_svm.support_vectors_)
assert sparse.issparse(sparse_svm.dual_coef_)
assert_allclose(dense_svm.support_vectors_, sparse_svm.support_vectors_.toarray())
assert_allclose(dense_svm.dual_coef_, sparse_svm.dual_coef_.toarray())
if dense_svm.kernel == "linear":
assert sparse.issparse(sparse_svm.coef_)
assert_array_almost_equal(dense_svm.coef_, sparse_svm.coef_.toarray())
assert_allclose(dense_svm.support_, sparse_svm.support_)
assert_allclose(dense_svm.predict(X_test_dense), sparse_svm.predict(X_test))
assert_array_almost_equal(
dense_svm.decision_function(X_test_dense), sparse_svm.decision_function(X_test)
)
assert_array_almost_equal(
dense_svm.decision_function(X_test_dense),
sparse_svm.decision_function(X_test_dense),
)
if isinstance(dense_svm, svm.OneClassSVM):
msg = "cannot use sparse input in 'OneClassSVM' trained on dense data"
else:
assert_array_almost_equal(
dense_svm.predict_proba(X_test_dense),
sparse_svm.predict_proba(X_test),
decimal=4,
)
msg = "cannot use sparse input in 'SVC' trained on dense data"
if sparse.issparse(X_test):
with pytest.raises(ValueError, match=msg):
dense_svm.predict(X_test)
@skip_if_32bit
@pytest.mark.parametrize(
"X_train, y_train, X_test",
[
[X, Y, T],
[X2, Y2, T2],
[X_blobs[:80], y_blobs[:80], X_blobs[80:]],
[iris.data, iris.target, iris.data],
],
)
@pytest.mark.parametrize("kernel", ["linear", "poly", "rbf", "sigmoid"])
@pytest.mark.parametrize("sparse_container", CSR_CONTAINERS + LIL_CONTAINERS)
def test_svc(X_train, y_train, X_test, kernel, sparse_container):
"""Check that sparse SVC gives the same result as SVC."""
X_train = sparse_container(X_train)
clf = svm.SVC(
gamma=1,
kernel=kernel,
probability=True,
random_state=0,
decision_function_shape="ovo",
)
check_svm_model_equal(clf, X_train, y_train, X_test)
@pytest.mark.parametrize("csr_container", CSR_CONTAINERS)
def test_unsorted_indices(csr_container):
# test that the result with sorted and unsorted indices in csr is the same
# we use a subset of digits as iris, blobs or make_classification didn't
# show the problem
X, y = load_digits(return_X_y=True)
X_test = csr_container(X[50:100])
X, y = X[:50], y[:50]
X_sparse = csr_container(X)
coef_dense = (
svm.SVC(kernel="linear", probability=True, random_state=0).fit(X, y).coef_
)
sparse_svc = svm.SVC(kernel="linear", probability=True, random_state=0).fit(
X_sparse, y
)
coef_sorted = sparse_svc.coef_
# make sure dense and sparse SVM give the same result
assert_allclose(coef_dense, coef_sorted.toarray())
# reverse each row's indices
def scramble_indices(X):
new_data = []
new_indices = []
for i in range(1, len(X.indptr)):
row_slice = slice(*X.indptr[i - 1 : i + 1])
new_data.extend(X.data[row_slice][::-1])
new_indices.extend(X.indices[row_slice][::-1])
return csr_container((new_data, new_indices, X.indptr), shape=X.shape)
X_sparse_unsorted = scramble_indices(X_sparse)
X_test_unsorted = scramble_indices(X_test)
assert not X_sparse_unsorted.has_sorted_indices
assert not X_test_unsorted.has_sorted_indices
unsorted_svc = svm.SVC(kernel="linear", probability=True, random_state=0).fit(
X_sparse_unsorted, y
)
coef_unsorted = unsorted_svc.coef_
# make sure unsorted indices give same result
assert_allclose(coef_unsorted.toarray(), coef_sorted.toarray())
assert_allclose(
sparse_svc.predict_proba(X_test_unsorted), sparse_svc.predict_proba(X_test)
)
@pytest.mark.parametrize("lil_container", LIL_CONTAINERS)
def test_svc_with_custom_kernel(lil_container):
def kfunc(x, y):
return safe_sparse_dot(x, y.T)
X_sp = lil_container(X)
clf_lin = svm.SVC(kernel="linear").fit(X_sp, Y)
clf_mylin = svm.SVC(kernel=kfunc).fit(X_sp, Y)
assert_array_equal(clf_lin.predict(X_sp), clf_mylin.predict(X_sp))
@skip_if_32bit
@pytest.mark.parametrize("csr_container", CSR_CONTAINERS)
@pytest.mark.parametrize("kernel", ["linear", "poly", "rbf"])
def test_svc_iris(csr_container, kernel):
# Test the sparse SVC with the iris dataset
iris_data_sp = csr_container(iris.data)
sp_clf = svm.SVC(kernel=kernel).fit(iris_data_sp, iris.target)
clf = svm.SVC(kernel=kernel).fit(iris.data, iris.target)
assert_allclose(clf.support_vectors_, sp_clf.support_vectors_.toarray())
assert_allclose(clf.dual_coef_, sp_clf.dual_coef_.toarray())
assert_allclose(clf.predict(iris.data), sp_clf.predict(iris_data_sp))
if kernel == "linear":
assert_allclose(clf.coef_, sp_clf.coef_.toarray())
@pytest.mark.parametrize("csr_container", CSR_CONTAINERS)
def test_sparse_decision_function(csr_container):
# Test decision_function
# Sanity check, test that decision_function implemented in python
# returns the same as the one in libsvm
# multi class:
iris_data_sp = csr_container(iris.data)
svc = svm.SVC(kernel="linear", C=0.1, decision_function_shape="ovo")
clf = svc.fit(iris_data_sp, iris.target)
dec = safe_sparse_dot(iris_data_sp, clf.coef_.T) + clf.intercept_
assert_allclose(dec, clf.decision_function(iris_data_sp))
# binary:
clf.fit(X, Y)
dec = np.dot(X, clf.coef_.T) + clf.intercept_
prediction = clf.predict(X)
assert_allclose(dec.ravel(), clf.decision_function(X))
assert_allclose(
prediction, clf.classes_[(clf.decision_function(X) > 0).astype(int).ravel()]
)
expected = np.array([-1.0, -0.66, -1.0, 0.66, 1.0, 1.0])
assert_array_almost_equal(clf.decision_function(X), expected, decimal=2)
@pytest.mark.parametrize("lil_container", LIL_CONTAINERS)
def test_error(lil_container):
# Test that it gives proper exception on deficient input
clf = svm.SVC()
X_sp = lil_container(X)
Y2 = Y[:-1] # wrong dimensions for labels
with pytest.raises(ValueError):
clf.fit(X_sp, Y2)
clf.fit(X_sp, Y)
assert_array_equal(clf.predict(T), true_result)
@pytest.mark.parametrize(
"lil_container, dok_container", zip(LIL_CONTAINERS, DOK_CONTAINERS)
)
def test_linearsvc(lil_container, dok_container):
# Similar to test_SVC
X_sp = lil_container(X)
X2_sp = dok_container(X2)
clf = svm.LinearSVC(random_state=0).fit(X, Y)
sp_clf = svm.LinearSVC(random_state=0).fit(X_sp, Y)
assert sp_clf.fit_intercept
assert_array_almost_equal(clf.coef_, sp_clf.coef_, decimal=4)
assert_array_almost_equal(clf.intercept_, sp_clf.intercept_, decimal=4)
assert_allclose(clf.predict(X), sp_clf.predict(X_sp))
clf.fit(X2, Y2)
sp_clf.fit(X2_sp, Y2)
assert_array_almost_equal(clf.coef_, sp_clf.coef_, decimal=4)
assert_array_almost_equal(clf.intercept_, sp_clf.intercept_, decimal=4)
@pytest.mark.parametrize("csr_container", CSR_CONTAINERS)
def test_linearsvc_iris(csr_container):
# Test the sparse LinearSVC with the iris dataset
iris_data_sp = csr_container(iris.data)
sp_clf = svm.LinearSVC(random_state=0).fit(iris_data_sp, iris.target)
clf = svm.LinearSVC(random_state=0).fit(iris.data, iris.target)
assert clf.fit_intercept == sp_clf.fit_intercept
assert_array_almost_equal(clf.coef_, sp_clf.coef_, decimal=1)
assert_array_almost_equal(clf.intercept_, sp_clf.intercept_, decimal=1)
assert_allclose(clf.predict(iris.data), sp_clf.predict(iris_data_sp))
# check decision_function
pred = np.argmax(sp_clf.decision_function(iris_data_sp), axis=1)
assert_allclose(pred, clf.predict(iris.data))
# sparsify the coefficients on both models and check that they still
# produce the same results
clf.sparsify()
assert_array_equal(pred, clf.predict(iris_data_sp))
sp_clf.sparsify()
assert_array_equal(pred, sp_clf.predict(iris_data_sp))
@pytest.mark.parametrize("csr_container", CSR_CONTAINERS)
def test_weight(csr_container):
# Test class weights
X_, y_ = make_classification(
n_samples=200, n_features=100, weights=[0.833, 0.167], random_state=0
)
X_ = csr_container(X_)
for clf in (
linear_model.LogisticRegression(),
svm.LinearSVC(random_state=0),
svm.SVC(),
):
clf.set_params(class_weight={0: 5})
clf.fit(X_[:180], y_[:180])
y_pred = clf.predict(X_[180:])
assert np.sum(y_pred == y_[180:]) >= 11
@pytest.mark.parametrize("lil_container", LIL_CONTAINERS)
def test_sample_weights(lil_container):
# Test weights on individual samples
X_sp = lil_container(X)
clf = svm.SVC()
clf.fit(X_sp, Y)
assert_array_equal(clf.predict([X[2]]), [1.0])
sample_weight = [0.1] * 3 + [10] * 3
clf.fit(X_sp, Y, sample_weight=sample_weight)
assert_array_equal(clf.predict([X[2]]), [2.0])
def test_sparse_liblinear_intercept_handling():
# Test that sparse liblinear honours intercept_scaling param
test_svm.test_dense_liblinear_intercept_handling(svm.LinearSVC)
@pytest.mark.parametrize(
"X_train, y_train, X_test",
[
[X, None, T],
[X2, None, T2],
[X_blobs[:80], None, X_blobs[80:]],
[iris.data, None, iris.data],
],
)
@pytest.mark.parametrize("kernel", ["linear", "poly", "rbf", "sigmoid"])
@pytest.mark.parametrize("sparse_container", CSR_CONTAINERS + LIL_CONTAINERS)
@skip_if_32bit
def test_sparse_oneclasssvm(X_train, y_train, X_test, kernel, sparse_container):
# Check that sparse OneClassSVM gives the same result as dense OneClassSVM
X_train = sparse_container(X_train)
clf = svm.OneClassSVM(gamma=1, kernel=kernel)
check_svm_model_equal(clf, X_train, y_train, X_test)
@pytest.mark.parametrize("csr_container", CSR_CONTAINERS)
def test_sparse_realdata(csr_container):
# Test on a subset from the 20newsgroups dataset.
# This catches some bugs if input is not correctly converted into
# sparse format or weights are not correctly initialized.
data = np.array([0.03771744, 0.1003567, 0.01174647, 0.027069])
# SVC does not support large sparse, so we specify int32 indices
# In this case, `csr_matrix` automatically uses int32 regardless of the dtypes of
# `indices` and `indptr` but `csr_array` may or may not use the same dtype as
# `indices` and `indptr`, which would be int64 if not specified
indices = np.array([6, 5, 35, 31], dtype=np.int32)
indptr = np.array([0] * 8 + [1] * 32 + [2] * 38 + [4] * 3, dtype=np.int32)
X = csr_container((data, indices, indptr))
y = np.array(
[
1.0,
0.0,
2.0,
2.0,
1.0,
1.0,
1.0,
2.0,
2.0,
0.0,
1.0,
2.0,
2.0,
0.0,
2.0,
0.0,
3.0,
0.0,
3.0,
0.0,
1.0,
1.0,
3.0,
2.0,
3.0,
2.0,
0.0,
3.0,
1.0,
0.0,
2.0,
1.0,
2.0,
0.0,
1.0,
0.0,
2.0,
3.0,
1.0,
3.0,
0.0,
1.0,
0.0,
0.0,
2.0,
0.0,
1.0,
2.0,
2.0,
2.0,
3.0,
2.0,
0.0,
3.0,
2.0,
1.0,
2.0,
3.0,
2.0,
2.0,
0.0,
1.0,
0.0,
1.0,
2.0,
3.0,
0.0,
0.0,
2.0,
2.0,
1.0,
3.0,
1.0,
1.0,
0.0,
1.0,
2.0,
1.0,
1.0,
3.0,
]
)
clf = svm.SVC(kernel="linear").fit(X.toarray(), y)
sp_clf = svm.SVC(kernel="linear").fit(X.tocoo(), y)
assert_array_equal(clf.support_vectors_, sp_clf.support_vectors_.toarray())
assert_array_equal(clf.dual_coef_, sp_clf.dual_coef_.toarray())
@pytest.mark.parametrize("lil_container", LIL_CONTAINERS)
def test_sparse_svc_clone_with_callable_kernel(lil_container):
# Test that the "dense_fit" is called even though we use sparse input
# meaning that everything works fine.
a = svm.SVC(C=1, kernel=lambda x, y: x @ y.T, probability=True, random_state=0)
b = base.clone(a)
X_sp = lil_container(X)
b.fit(X_sp, Y)
pred = b.predict(X_sp)
b.predict_proba(X_sp)
dense_svm = svm.SVC(
C=1, kernel=lambda x, y: np.dot(x, y.T), probability=True, random_state=0
)
pred_dense = dense_svm.fit(X, Y).predict(X)
assert_array_equal(pred_dense, pred)
# b.decision_function(X_sp) # XXX : should be supported
@pytest.mark.parametrize("lil_container", LIL_CONTAINERS)
def test_timeout(lil_container):
sp = svm.SVC(
C=1, kernel=lambda x, y: x @ y.T, probability=True, random_state=0, max_iter=1
)
warning_msg = (
r"Solver terminated early \(max_iter=1\). Consider pre-processing "
r"your data with StandardScaler or MinMaxScaler."
)
with pytest.warns(ConvergenceWarning, match=warning_msg):
sp.fit(lil_container(X), Y)
def test_consistent_proba():
a = svm.SVC(probability=True, max_iter=1, random_state=0)
with ignore_warnings(category=ConvergenceWarning):
proba_1 = a.fit(X, Y).predict_proba(X)
a = svm.SVC(probability=True, max_iter=1, random_state=0)
with ignore_warnings(category=ConvergenceWarning):
proba_2 = a.fit(X, Y).predict_proba(X)
assert_allclose(proba_1, proba_2)
File diff suppressed because it is too large Load Diff