|
""" |
|
Binding for libsvm_skl |
|
---------------------- |
|
|
|
These are the bindings for libsvm_skl, which is a fork of libsvm[1] |
|
that adds to libsvm some capabilities, like index of support vectors |
|
and efficient representation of dense matrices. |
|
|
|
These are low-level routines, but can be used for flexibility or |
|
performance reasons. See sklearn.svm for a higher-level API. |
|
|
|
Low-level memory management is done in libsvm_helper.c. If we happen |
|
to run out of memory a MemoryError will be raised. In practice this is |
|
not very helpful since high chances are malloc fails inside svm.cpp, |
|
where no sort of memory checks are done. |
|
|
|
[1] https://www.csie.ntu.edu.tw/~cjlin/libsvm/ |
|
|
|
Notes |
|
----- |
|
The signature mode='c' is somewhat superficial, since we already |
|
check that arrays are C-contiguous in svm.py |
|
|
|
Authors |
|
------- |
|
2010: Fabian Pedregosa <[email protected]> |
|
Gael Varoquaux <[email protected]> |
|
""" |
|
|
|
import numpy as np |
|
from libc.stdlib cimport free |
|
from ..utils._cython_blas cimport _dot |
|
from ..utils._typedefs cimport float64_t, int32_t, intp_t |
|
|
|
include "_libsvm.pxi" |
|
|
|
cdef extern from *: |
|
ctypedef struct svm_parameter: |
|
pass |
|
|
|
|
|
|
|
|
|
LIBSVM_KERNEL_TYPES = ['linear', 'poly', 'rbf', 'sigmoid', 'precomputed'] |
|
|
|
|
|
|
|
|
|
|
|
def fit( |
|
const float64_t[:, ::1] X, |
|
const float64_t[::1] Y, |
|
int svm_type=0, |
|
kernel='rbf', |
|
int degree=3, |
|
double gamma=0.1, |
|
double coef0=0.0, |
|
double tol=1e-3, |
|
double C=1.0, |
|
double nu=0.5, |
|
double epsilon=0.1, |
|
const float64_t[::1] class_weight=np.empty(0), |
|
const float64_t[::1] sample_weight=np.empty(0), |
|
int shrinking=1, |
|
int probability=0, |
|
double cache_size=100., |
|
int max_iter=-1, |
|
int random_seed=0, |
|
): |
|
""" |
|
Train the model using libsvm (low-level method) |
|
|
|
Parameters |
|
---------- |
|
X : array-like, dtype=float64 of shape (n_samples, n_features) |
|
|
|
Y : array, dtype=float64 of shape (n_samples,) |
|
target vector |
|
|
|
svm_type : {0, 1, 2, 3, 4}, default=0 |
|
Type of SVM: C_SVC, NuSVC, OneClassSVM, EpsilonSVR or NuSVR |
|
respectively. |
|
|
|
kernel : {'linear', 'rbf', 'poly', 'sigmoid', 'precomputed'}, default="rbf" |
|
Kernel to use in the model: linear, polynomial, RBF, sigmoid |
|
or precomputed. |
|
|
|
degree : int32, default=3 |
|
Degree of the polynomial kernel (only relevant if kernel is |
|
set to polynomial). |
|
|
|
gamma : float64, default=0.1 |
|
Gamma parameter in rbf, poly and sigmoid kernels. Ignored by other |
|
kernels. |
|
|
|
coef0 : float64, default=0 |
|
Independent parameter in poly/sigmoid kernel. |
|
|
|
tol : float64, default=1e-3 |
|
Numeric stopping criterion (WRITEME). |
|
|
|
C : float64, default=1 |
|
C parameter in C-Support Vector Classification. |
|
|
|
nu : float64, default=0.5 |
|
An upper bound on the fraction of training errors and a lower bound of |
|
the fraction of support vectors. Should be in the interval (0, 1]. |
|
|
|
epsilon : double, default=0.1 |
|
Epsilon parameter in the epsilon-insensitive loss function. |
|
|
|
class_weight : array, dtype=float64, shape (n_classes,), \ |
|
default=np.empty(0) |
|
Set the parameter C of class i to class_weight[i]*C for |
|
SVC. If not given, all classes are supposed to have |
|
weight one. |
|
|
|
sample_weight : array, dtype=float64, shape (n_samples,), \ |
|
default=np.empty(0) |
|
Weights assigned to each sample. |
|
|
|
shrinking : int, default=1 |
|
Whether to use the shrinking heuristic. |
|
|
|
probability : int, default=0 |
|
Whether to enable probability estimates. |
|
|
|
cache_size : float64, default=100 |
|
Cache size for gram matrix columns (in megabytes). |
|
|
|
max_iter : int (-1 for no limit), default=-1 |
|
Stop solver after this many iterations regardless of accuracy |
|
(XXX Currently there is no API to know whether this kicked in.) |
|
|
|
random_seed : int, default=0 |
|
Seed for the random number generator used for probability estimates. |
|
|
|
Returns |
|
------- |
|
support : array of shape (n_support,) |
|
Index of support vectors. |
|
|
|
support_vectors : array of shape (n_support, n_features) |
|
Support vectors (equivalent to X[support]). Will return an |
|
empty array in the case of precomputed kernel. |
|
|
|
n_class_SV : array of shape (n_class,) |
|
Number of support vectors in each class. |
|
|
|
sv_coef : array of shape (n_class-1, n_support) |
|
Coefficients of support vectors in decision function. |
|
|
|
intercept : array of shape (n_class*(n_class-1)/2,) |
|
Intercept in decision function. |
|
|
|
probA, probB : array of shape (n_class*(n_class-1)/2,) |
|
Probability estimates, empty array for probability=False. |
|
|
|
n_iter : ndarray of shape (max(1, (n_class * (n_class - 1) // 2)),) |
|
Number of iterations run by the optimization routine to fit the model. |
|
""" |
|
|
|
cdef svm_parameter param |
|
cdef svm_problem problem |
|
cdef svm_model *model |
|
cdef const char *error_msg |
|
cdef intp_t SV_len |
|
|
|
if len(sample_weight) == 0: |
|
sample_weight = np.ones(X.shape[0], dtype=np.float64) |
|
else: |
|
assert sample_weight.shape[0] == X.shape[0], ( |
|
f"sample_weight and X have incompatible shapes: sample_weight has " |
|
f"{sample_weight.shape[0]} samples while X has {X.shape[0]}" |
|
) |
|
|
|
kernel_index = LIBSVM_KERNEL_TYPES.index(kernel) |
|
set_problem( |
|
&problem, |
|
<char*> &X[0, 0], |
|
<char*> &Y[0], |
|
<char*> &sample_weight[0], |
|
<intp_t*> X.shape, |
|
kernel_index, |
|
) |
|
if problem.x == NULL: |
|
raise MemoryError("Seems we've run out of memory") |
|
cdef int32_t[::1] class_weight_label = np.arange( |
|
class_weight.shape[0], dtype=np.int32 |
|
) |
|
set_parameter( |
|
¶m, |
|
svm_type, |
|
kernel_index, |
|
degree, |
|
gamma, |
|
coef0, |
|
nu, |
|
cache_size, |
|
C, |
|
tol, |
|
epsilon, |
|
shrinking, |
|
probability, |
|
<int> class_weight.shape[0], |
|
<char*> &class_weight_label[0] if class_weight_label.size > 0 else NULL, |
|
<char*> &class_weight[0] if class_weight.size > 0 else NULL, |
|
max_iter, |
|
random_seed, |
|
) |
|
|
|
error_msg = svm_check_parameter(&problem, ¶m) |
|
if error_msg: |
|
# for SVR: epsilon is called p in libsvm |
|
error_repl = error_msg.decode('utf-8').replace("p < 0", "epsilon < 0") |
|
raise ValueError(error_repl) |
|
cdef BlasFunctions blas_functions |
|
blas_functions.dot = _dot[double] |
|
# this does the real work |
|
cdef int fit_status = 0 |
|
with nogil: |
|
model = svm_train(&problem, ¶m, &fit_status, &blas_functions) |
|
|
|
# from here until the end, we just copy the data returned by |
|
# svm_train |
|
SV_len = get_l(model) |
|
n_class = get_nr(model) |
|
|
|
cdef int[::1] n_iter = np.empty(max(1, n_class * (n_class - 1) // 2), dtype=np.intc) |
|
copy_n_iter(<char*> &n_iter[0], model) |
|
|
|
cdef float64_t[:, ::1] sv_coef = np.empty((n_class-1, SV_len), dtype=np.float64) |
|
copy_sv_coef(<char*> &sv_coef[0, 0] if sv_coef.size > 0 else NULL, model) |
|
|
|
# the intercept is just model.rho but with sign changed |
|
cdef float64_t[::1] intercept = np.empty( |
|
int((n_class*(n_class-1))/2), dtype=np.float64 |
|
) |
|
copy_intercept(<char*> &intercept[0], model, <intp_t*> intercept.shape) |
|
|
|
cdef int32_t[::1] support = np.empty(SV_len, dtype=np.int32) |
|
copy_support(<char*> &support[0] if support.size > 0 else NULL, model) |
|
|
|
# copy model.SV |
|
cdef float64_t[:, ::1] support_vectors |
|
if kernel_index == 4: |
|
# precomputed kernel |
|
support_vectors = np.empty((0, 0), dtype=np.float64) |
|
else: |
|
support_vectors = np.empty((SV_len, X.shape[1]), dtype=np.float64) |
|
copy_SV( |
|
<char*> &support_vectors[0, 0] if support_vectors.size > 0 else NULL, |
|
model, |
|
<intp_t*> support_vectors.shape, |
|
) |
|
|
|
cdef int32_t[::1] n_class_SV |
|
if svm_type == 0 or svm_type == 1: |
|
n_class_SV = np.empty(n_class, dtype=np.int32) |
|
copy_nSV(<char*> &n_class_SV[0] if n_class_SV.size > 0 else NULL, model) |
|
else: |
|
# OneClass and SVR are considered to have 2 classes |
|
n_class_SV = np.array([SV_len, SV_len], dtype=np.int32) |
|
|
|
cdef float64_t[::1] probA |
|
cdef float64_t[::1] probB |
|
if probability != 0: |
|
if svm_type < 2: # SVC and NuSVC |
|
probA = np.empty(int(n_class*(n_class-1)/2), dtype=np.float64) |
|
probB = np.empty(int(n_class*(n_class-1)/2), dtype=np.float64) |
|
copy_probB(<char*> &probB[0], model, <intp_t*> probB.shape) |
|
else: |
|
probA = np.empty(1, dtype=np.float64) |
|
probB = np.empty(0, dtype=np.float64) |
|
copy_probA(<char*> &probA[0], model, <intp_t*> probA.shape) |
|
else: |
|
probA = np.empty(0, dtype=np.float64) |
|
probB = np.empty(0, dtype=np.float64) |
|
|
|
svm_free_and_destroy_model(&model) |
|
free(problem.x) |
|
|
|
return ( |
|
support.base, |
|
support_vectors.base, |
|
n_class_SV.base, |
|
sv_coef.base, |
|
intercept.base, |
|
probA.base, |
|
probB.base, |
|
fit_status, |
|
n_iter.base, |
|
) |
|
|
|
|
|
cdef void set_predict_params( |
|
svm_parameter *param, |
|
int svm_type, |
|
kernel, |
|
int degree, |
|
double gamma, |
|
double coef0, |
|
double cache_size, |
|
int probability, |
|
int nr_weight, |
|
char *weight_label, |
|
char *weight, |
|
) except *: |
|
"""Fill param with prediction time-only parameters.""" |
|
|
|
# training-time only parameters |
|
cdef double C = 0.0 |
|
cdef double epsilon = 0.1 |
|
cdef int max_iter = 0 |
|
cdef double nu = 0.5 |
|
cdef int shrinking = 0 |
|
cdef double tol = 0.1 |
|
cdef int random_seed = -1 |
|
|
|
kernel_index = LIBSVM_KERNEL_TYPES.index(kernel) |
|
|
|
set_parameter( |
|
param, |
|
svm_type, |
|
kernel_index, |
|
degree, |
|
gamma, |
|
coef0, |
|
nu, |
|
cache_size, |
|
C, |
|
tol, |
|
epsilon, |
|
shrinking, |
|
probability, |
|
nr_weight, |
|
weight_label, |
|
weight, |
|
max_iter, |
|
random_seed, |
|
) |
|
|
|
|
|
def predict( |
|
const float64_t[:, ::1] X, |
|
const int32_t[::1] support, |
|
const float64_t[:, ::1] SV, |
|
const int32_t[::1] nSV, |
|
const float64_t[:, ::1] sv_coef, |
|
const float64_t[::1] intercept, |
|
const float64_t[::1] probA=np.empty(0), |
|
const float64_t[::1] probB=np.empty(0), |
|
int svm_type=0, |
|
kernel='rbf', |
|
int degree=3, |
|
double gamma=0.1, |
|
double coef0=0.0, |
|
const float64_t[::1] class_weight=np.empty(0), |
|
const float64_t[::1] sample_weight=np.empty(0), |
|
double cache_size=100.0, |
|
): |
|
""" |
|
Predict target values of X given a model (low-level method) |
|
|
|
Parameters |
|
---------- |
|
X : array-like, dtype=float of shape (n_samples, n_features) |
|
|
|
support : array of shape (n_support,) |
|
Index of support vectors in training set. |
|
|
|
SV : array of shape (n_support, n_features) |
|
Support vectors. |
|
|
|
nSV : array of shape (n_class,) |
|
Number of support vectors in each class. |
|
|
|
sv_coef : array of shape (n_class-1, n_support) |
|
Coefficients of support vectors in decision function. |
|
|
|
intercept : array of shape (n_class*(n_class-1)/2) |
|
Intercept in decision function. |
|
|
|
probA, probB : array of shape (n_class*(n_class-1)/2,) |
|
Probability estimates. |
|
|
|
svm_type : {0, 1, 2, 3, 4}, default=0 |
|
Type of SVM: C_SVC, NuSVC, OneClassSVM, EpsilonSVR or NuSVR |
|
respectively. |
|
|
|
kernel : {'linear', 'rbf', 'poly', 'sigmoid', 'precomputed'}, default="rbf" |
|
Kernel to use in the model: linear, polynomial, RBF, sigmoid |
|
or precomputed. |
|
|
|
degree : int32, default=3 |
|
Degree of the polynomial kernel (only relevant if kernel is |
|
set to polynomial). |
|
|
|
gamma : float64, default=0.1 |
|
Gamma parameter in rbf, poly and sigmoid kernels. Ignored by other |
|
kernels. |
|
|
|
coef0 : float64, default=0.0 |
|
Independent parameter in poly/sigmoid kernel. |
|
|
|
Returns |
|
------- |
|
dec_values : array |
|
Predicted values. |
|
""" |
|
cdef float64_t[::1] dec_values |
|
cdef svm_parameter param |
|
cdef svm_model *model |
|
cdef int rv |
|
|
|
cdef int32_t[::1] class_weight_label = np.arange( |
|
class_weight.shape[0], dtype=np.int32 |
|
) |
|
|
|
set_predict_params( |
|
¶m, |
|
svm_type, |
|
kernel, |
|
degree, |
|
gamma, |
|
coef0, |
|
cache_size, |
|
0, |
|
<int>class_weight.shape[0], |
|
<char*> &class_weight_label[0] if class_weight_label.size > 0 else NULL, |
|
<char*> &class_weight[0] if class_weight.size > 0 else NULL, |
|
) |
|
model = set_model( |
|
¶m, |
|
<int> nSV.shape[0], |
|
<char*> &SV[0, 0] if SV.size > 0 else NULL, |
|
<intp_t*> SV.shape, |
|
<char*> &support[0] if support.size > 0 else NULL, |
|
<intp_t*> support.shape, |
|
<intp_t*> sv_coef.strides, |
|
<char*> &sv_coef[0, 0] if sv_coef.size > 0 else NULL, |
|
<char*> &intercept[0], |
|
<char*> &nSV[0], |
|
<char*> &probA[0] if probA.size > 0 else NULL, |
|
<char*> &probB[0] if probB.size > 0 else NULL, |
|
) |
|
cdef BlasFunctions blas_functions |
|
blas_functions.dot = _dot[double] |
|
# TODO: use check_model |
|
try: |
|
dec_values = np.empty(X.shape[0]) |
|
with nogil: |
|
rv = copy_predict( |
|
<char*> &X[0, 0], |
|
model, |
|
<intp_t*> X.shape, |
|
<char*> &dec_values[0], |
|
&blas_functions, |
|
) |
|
if rv < 0: |
|
raise MemoryError("We've run out of memory") |
|
finally: |
|
free_model(model) |
|
|
|
return dec_values.base |
|
|
|
|
|
def predict_proba( |
|
const float64_t[:, ::1] X, |
|
const int32_t[::1] support, |
|
const float64_t[:, ::1] SV, |
|
const int32_t[::1] nSV, |
|
float64_t[:, ::1] sv_coef, |
|
float64_t[::1] intercept, |
|
float64_t[::1] probA=np.empty(0), |
|
float64_t[::1] probB=np.empty(0), |
|
int svm_type=0, |
|
kernel='rbf', |
|
int degree=3, |
|
double gamma=0.1, |
|
double coef0=0.0, |
|
float64_t[::1] class_weight=np.empty(0), |
|
float64_t[::1] sample_weight=np.empty(0), |
|
double cache_size=100.0, |
|
): |
|
""" |
|
Predict probabilities |
|
|
|
svm_model stores all parameters needed to predict a given value. |
|
|
|
For speed, all real work is done at the C level in function |
|
copy_predict (libsvm_helper.c). |
|
|
|
We have to reconstruct model and parameters to make sure we stay |
|
in sync with the python object. |
|
|
|
See sklearn.svm.predict for a complete list of parameters. |
|
|
|
Parameters |
|
---------- |
|
X : array-like, dtype=float of shape (n_samples, n_features) |
|
|
|
support : array of shape (n_support,) |
|
Index of support vectors in training set. |
|
|
|
SV : array of shape (n_support, n_features) |
|
Support vectors. |
|
|
|
nSV : array of shape (n_class,) |
|
Number of support vectors in each class. |
|
|
|
sv_coef : array of shape (n_class-1, n_support) |
|
Coefficients of support vectors in decision function. |
|
|
|
intercept : array of shape (n_class*(n_class-1)/2,) |
|
Intercept in decision function. |
|
|
|
probA, probB : array of shape (n_class*(n_class-1)/2,) |
|
Probability estimates. |
|
|
|
svm_type : {0, 1, 2, 3, 4}, default=0 |
|
Type of SVM: C_SVC, NuSVC, OneClassSVM, EpsilonSVR or NuSVR |
|
respectively. |
|
|
|
kernel : {'linear', 'rbf', 'poly', 'sigmoid', 'precomputed'}, default="rbf" |
|
Kernel to use in the model: linear, polynomial, RBF, sigmoid |
|
or precomputed. |
|
|
|
degree : int32, default=3 |
|
Degree of the polynomial kernel (only relevant if kernel is |
|
set to polynomial). |
|
|
|
gamma : float64, default=0.1 |
|
Gamma parameter in rbf, poly and sigmoid kernels. Ignored by other |
|
kernels. |
|
|
|
coef0 : float64, default=0.0 |
|
Independent parameter in poly/sigmoid kernel. |
|
|
|
Returns |
|
------- |
|
dec_values : array |
|
Predicted values. |
|
""" |
|
cdef float64_t[:, ::1] dec_values |
|
cdef svm_parameter param |
|
cdef svm_model *model |
|
cdef int32_t[::1] class_weight_label = np.arange( |
|
class_weight.shape[0], dtype=np.int32 |
|
) |
|
cdef int rv |
|
|
|
set_predict_params( |
|
¶m, |
|
svm_type, |
|
kernel, |
|
degree, |
|
gamma, |
|
coef0, |
|
cache_size, |
|
1, |
|
<int> class_weight.shape[0], |
|
<char*> &class_weight_label[0] if class_weight_label.size > 0 else NULL, |
|
<char*> &class_weight[0] if class_weight.size > 0 else NULL, |
|
) |
|
model = set_model( |
|
¶m, |
|
<int> nSV.shape[0], |
|
<char*> &SV[0, 0] if SV.size > 0 else NULL, |
|
<intp_t*> SV.shape, |
|
<char*> &support[0], |
|
<intp_t*> support.shape, |
|
<intp_t*> sv_coef.strides, |
|
<char*> &sv_coef[0, 0], |
|
<char*> &intercept[0], |
|
<char*> &nSV[0], |
|
<char*> &probA[0] if probA.size > 0 else NULL, |
|
<char*> &probB[0] if probB.size > 0 else NULL, |
|
) |
|
|
|
cdef intp_t n_class = get_nr(model) |
|
cdef BlasFunctions blas_functions |
|
blas_functions.dot = _dot[double] |
|
try: |
|
dec_values = np.empty((X.shape[0], n_class), dtype=np.float64) |
|
with nogil: |
|
rv = copy_predict_proba( |
|
<char*> &X[0, 0], |
|
model, |
|
<intp_t*> X.shape, |
|
<char*> &dec_values[0, 0], |
|
&blas_functions, |
|
) |
|
if rv < 0: |
|
raise MemoryError("We've run out of memory") |
|
finally: |
|
free_model(model) |
|
|
|
return dec_values.base |
|
|
|
|
|
def decision_function( |
|
const float64_t[:, ::1] X, |
|
const int32_t[::1] support, |
|
const float64_t[:, ::1] SV, |
|
const int32_t[::1] nSV, |
|
const float64_t[:, ::1] sv_coef, |
|
const float64_t[::1] intercept, |
|
const float64_t[::1] probA=np.empty(0), |
|
const float64_t[::1] probB=np.empty(0), |
|
int svm_type=0, |
|
kernel='rbf', |
|
int degree=3, |
|
double gamma=0.1, |
|
double coef0=0.0, |
|
const float64_t[::1] class_weight=np.empty(0), |
|
const float64_t[::1] sample_weight=np.empty(0), |
|
double cache_size=100.0, |
|
): |
|
""" |
|
Predict margin (libsvm name for this is predict_values) |
|
|
|
We have to reconstruct model and parameters to make sure we stay |
|
in sync with the python object. |
|
|
|
Parameters |
|
---------- |
|
X : array-like, dtype=float, size=[n_samples, n_features] |
|
|
|
support : array, shape=[n_support] |
|
Index of support vectors in training set. |
|
|
|
SV : array, shape=[n_support, n_features] |
|
Support vectors. |
|
|
|
nSV : array, shape=[n_class] |
|
Number of support vectors in each class. |
|
|
|
sv_coef : array, shape=[n_class-1, n_support] |
|
Coefficients of support vectors in decision function. |
|
|
|
intercept : array, shape=[n_class*(n_class-1)/2] |
|
Intercept in decision function. |
|
|
|
probA, probB : array, shape=[n_class*(n_class-1)/2] |
|
Probability estimates. |
|
|
|
svm_type : {0, 1, 2, 3, 4}, optional |
|
Type of SVM: C_SVC, NuSVC, OneClassSVM, EpsilonSVR or NuSVR |
|
respectively. 0 by default. |
|
|
|
kernel : {'linear', 'rbf', 'poly', 'sigmoid', 'precomputed'}, optional |
|
Kernel to use in the model: linear, polynomial, RBF, sigmoid |
|
or precomputed. 'rbf' by default. |
|
|
|
degree : int32, optional |
|
Degree of the polynomial kernel (only relevant if kernel is |
|
set to polynomial), 3 by default. |
|
|
|
gamma : float64, optional |
|
Gamma parameter in rbf, poly and sigmoid kernels. Ignored by other |
|
kernels. 0.1 by default. |
|
|
|
coef0 : float64, optional |
|
Independent parameter in poly/sigmoid kernel. 0 by default. |
|
|
|
Returns |
|
------- |
|
dec_values : array |
|
Predicted values. |
|
""" |
|
cdef float64_t[:, ::1] dec_values |
|
cdef svm_parameter param |
|
cdef svm_model *model |
|
cdef intp_t n_class |
|
|
|
cdef int32_t[::1] class_weight_label = np.arange( |
|
class_weight.shape[0], dtype=np.int32 |
|
) |
|
|
|
cdef int rv |
|
|
|
set_predict_params( |
|
¶m, |
|
svm_type, |
|
kernel, |
|
degree, |
|
gamma, |
|
coef0, |
|
cache_size, |
|
0, |
|
<int> class_weight.shape[0], |
|
<char*> &class_weight_label[0] if class_weight_label.size > 0 else NULL, |
|
<char*> &class_weight[0] if class_weight.size > 0 else NULL, |
|
) |
|
|
|
model = set_model( |
|
¶m, |
|
<int> nSV.shape[0], |
|
<char*> &SV[0, 0] if SV.size > 0 else NULL, |
|
<intp_t*> SV.shape, |
|
<char*> &support[0], |
|
<intp_t*> support.shape, |
|
<intp_t*> sv_coef.strides, |
|
<char*> &sv_coef[0, 0], |
|
<char*> &intercept[0], |
|
<char*> &nSV[0], |
|
<char*> &probA[0] if probA.size > 0 else NULL, |
|
<char*> &probB[0] if probB.size > 0 else NULL, |
|
) |
|
|
|
if svm_type > 1: |
|
n_class = 1 |
|
else: |
|
n_class = get_nr(model) |
|
n_class = n_class * (n_class - 1) // 2 |
|
cdef BlasFunctions blas_functions |
|
blas_functions.dot = _dot[double] |
|
try: |
|
dec_values = np.empty((X.shape[0], n_class), dtype=np.float64) |
|
with nogil: |
|
rv = copy_predict_values( |
|
<char*> &X[0, 0], |
|
model, |
|
<intp_t*> X.shape, |
|
<char*> &dec_values[0, 0], |
|
n_class, |
|
&blas_functions, |
|
) |
|
if rv < 0: |
|
raise MemoryError("We've run out of memory") |
|
finally: |
|
free_model(model) |
|
|
|
return dec_values.base |
|
|
|
|
|
def cross_validation( |
|
const float64_t[:, ::1] X, |
|
const float64_t[::1] Y, |
|
int n_fold, |
|
int svm_type=0, |
|
kernel='rbf', |
|
int degree=3, |
|
double gamma=0.1, |
|
double coef0=0.0, |
|
double tol=1e-3, |
|
double C=1.0, |
|
double nu=0.5, |
|
double epsilon=0.1, |
|
float64_t[::1] class_weight=np.empty(0), |
|
float64_t[::1] sample_weight=np.empty(0), |
|
int shrinking=0, |
|
int probability=0, |
|
double cache_size=100.0, |
|
int max_iter=-1, |
|
int random_seed=0, |
|
): |
|
""" |
|
Binding of the cross-validation routine (low-level routine) |
|
|
|
Parameters |
|
---------- |
|
|
|
X : array-like, dtype=float of shape (n_samples, n_features) |
|
|
|
Y : array, dtype=float of shape (n_samples,) |
|
target vector |
|
|
|
n_fold : int32 |
|
Number of folds for cross validation. |
|
|
|
svm_type : {0, 1, 2, 3, 4}, default=0 |
|
Type of SVM: C_SVC, NuSVC, OneClassSVM, EpsilonSVR or NuSVR |
|
respectively. |
|
|
|
kernel : {'linear', 'rbf', 'poly', 'sigmoid', 'precomputed'}, default='rbf' |
|
Kernel to use in the model: linear, polynomial, RBF, sigmoid |
|
or precomputed. |
|
|
|
degree : int32, default=3 |
|
Degree of the polynomial kernel (only relevant if kernel is |
|
set to polynomial). |
|
|
|
gamma : float64, default=0.1 |
|
Gamma parameter in rbf, poly and sigmoid kernels. Ignored by other |
|
kernels. |
|
|
|
coef0 : float64, default=0.0 |
|
Independent parameter in poly/sigmoid kernel. |
|
|
|
tol : float64, default=1e-3 |
|
Numeric stopping criterion (WRITEME). |
|
|
|
C : float64, default=1 |
|
C parameter in C-Support Vector Classification. |
|
|
|
nu : float64, default=0.5 |
|
An upper bound on the fraction of training errors and a lower bound of |
|
the fraction of support vectors. Should be in the interval (0, 1]. |
|
|
|
epsilon : double, default=0.1 |
|
Epsilon parameter in the epsilon-insensitive loss function. |
|
|
|
class_weight : array, dtype=float64, shape (n_classes,), \ |
|
default=np.empty(0) |
|
Set the parameter C of class i to class_weight[i]*C for |
|
SVC. If not given, all classes are supposed to have |
|
weight one. |
|
|
|
sample_weight : array, dtype=float64, shape (n_samples,), \ |
|
default=np.empty(0) |
|
Weights assigned to each sample. |
|
|
|
shrinking : int, default=1 |
|
Whether to use the shrinking heuristic. |
|
|
|
probability : int, default=0 |
|
Whether to enable probability estimates. |
|
|
|
cache_size : float64, default=100 |
|
Cache size for gram matrix columns (in megabytes). |
|
|
|
max_iter : int (-1 for no limit), default=-1 |
|
Stop solver after this many iterations regardless of accuracy |
|
(XXX Currently there is no API to know whether this kicked in.) |
|
|
|
random_seed : int, default=0 |
|
Seed for the random number generator used for probability estimates. |
|
|
|
Returns |
|
------- |
|
target : array, float |
|
|
|
""" |
|
|
|
cdef svm_parameter param |
|
cdef svm_problem problem |
|
cdef const char *error_msg |
|
|
|
if len(sample_weight) == 0: |
|
sample_weight = np.ones(X.shape[0], dtype=np.float64) |
|
else: |
|
assert sample_weight.shape[0] == X.shape[0], ( |
|
f"sample_weight and X have incompatible shapes: sample_weight has " |
|
f"{sample_weight.shape[0]} samples while X has {X.shape[0]}" |
|
) |
|
|
|
if X.shape[0] < n_fold: |
|
raise ValueError("Number of samples is less than number of folds") |
|
|
|
# set problem |
|
kernel_index = LIBSVM_KERNEL_TYPES.index(kernel) |
|
set_problem( |
|
&problem, |
|
<char*> &X[0, 0], |
|
<char*> &Y[0], |
|
<char*> &sample_weight[0] if sample_weight.size > 0 else NULL, |
|
<intp_t*> X.shape, |
|
kernel_index, |
|
) |
|
if problem.x == NULL: |
|
raise MemoryError("Seems we've run out of memory") |
|
cdef int32_t[::1] class_weight_label = np.arange( |
|
class_weight.shape[0], dtype=np.int32 |
|
) |
|
|
|
# set parameters |
|
set_parameter( |
|
¶m, |
|
svm_type, |
|
kernel_index, |
|
degree, |
|
gamma, |
|
coef0, |
|
nu, |
|
cache_size, |
|
C, |
|
tol, |
|
tol, |
|
shrinking, |
|
probability, |
|
<int> class_weight.shape[0], |
|
<char*> &class_weight_label[0] if class_weight_label.size > 0 else NULL, |
|
<char*> &class_weight[0] if class_weight.size > 0 else NULL, |
|
max_iter, |
|
random_seed, |
|
) |
|
|
|
error_msg = svm_check_parameter(&problem, ¶m) |
|
if error_msg: |
|
raise ValueError(error_msg) |
|
|
|
cdef float64_t[::1] target |
|
cdef BlasFunctions blas_functions |
|
blas_functions.dot = _dot[double] |
|
try: |
|
target = np.empty((X.shape[0]), dtype=np.float64) |
|
with nogil: |
|
svm_cross_validation( |
|
&problem, |
|
¶m, |
|
n_fold, |
|
<double *> &target[0], |
|
&blas_functions, |
|
) |
|
finally: |
|
free(problem.x) |
|
|
|
return target.base |
|
|
|
|
|
def set_verbosity_wrap(int verbosity): |
|
""" |
|
Control verbosity of libsvm library |
|
""" |
|
set_verbosity(verbosity) |
|
|