Serves as a wrapper for the libsvm. More...
#include <OpenMS/ANALYSIS/SVM/SVMWrapper.h>
Public Types | |
enum | SVM_parameter_type { SVM_TYPE, KERNEL_TYPE, DEGREE, C, NU, P, GAMMA, PROBABILITY, SIGMA, BORDER_LENGTH } |
Parameters for the svm to be set from outside. More... | |
enum | SVM_kernel_type { OLIGO = 19, OLIGO_COMBINED } |
Kernel type. More... | |
![]() | |
enum | LogType { CMD, GUI, NONE } |
Possible log types. More... | |
Public Member Functions | |
SVMWrapper () | |
standard constructor More... | |
virtual | ~SVMWrapper () |
destructor More... | |
void | setParameter (SVM_parameter_type type, Int value) |
You can set the parameters of the svm: More... | |
void | setParameter (SVM_parameter_type type, DoubleReal value) |
sets the double parameters of the svm More... | |
Int | train (struct svm_problem *problem) |
trains the svm More... | |
Int | train (SVMData &problem) |
trains the svm More... | |
void | saveModel (std::string modelFilename) const |
saves the svm model More... | |
void | loadModel (std::string modelFilename) |
loads the model More... | |
void | predict (struct svm_problem *problem, std::vector< DoubleReal > &predicted_labels) |
predicts the labels using the trained model More... | |
void | predict (const SVMData &problem, std::vector< DoubleReal > &results) |
predicts the labels using the trained model More... | |
Int | getIntParameter (SVM_parameter_type type) |
You can get the actual int- parameters of the svm. More... | |
DoubleReal | getDoubleParameter (SVM_parameter_type type) |
You can get the actual double- parameters of the svm. More... | |
void | predict (const std::vector< svm_node * > &vectors, std::vector< DoubleReal > &predicted_rts) |
predicts the labels using the trained model More... | |
DoubleReal | performCrossValidation (svm_problem *problem_ul, const SVMData &problem_l, const bool is_labeled, const std::map< SVM_parameter_type, DoubleReal > &start_values_map, const std::map< SVM_parameter_type, DoubleReal > &step_sizes_map, const std::map< SVM_parameter_type, DoubleReal > &end_values_map, Size number_of_partitions, Size number_of_runs, std::map< SVM_parameter_type, DoubleReal > &best_parameters, bool additive_step_sizes=true, bool output=false, String performances_file_name="performances.txt", bool mcc_as_performance_measure=false) |
Performs a CV for the data given by 'problem'. More... | |
DoubleReal | getSVRProbability () |
Returns the probability parameter sigma of the fitted laplace model. More... | |
void | getSignificanceBorders (svm_problem *data, std::pair< DoubleReal, DoubleReal > &borders, DoubleReal confidence=0.95, Size number_of_runs=5, Size number_of_partitions=5, DoubleReal step_size=0.01, Size max_iterations=1000000) |
calculates the significance borders of the error model and stores them in 'sigmas' More... | |
void | getSignificanceBorders (const SVMData &data, std::pair< DoubleReal, DoubleReal > &sigmas, DoubleReal confidence=0.95, Size number_of_runs=5, Size number_of_partitions=5, DoubleReal step_size=0.01, Size max_iterations=1000000) |
calculates the significance borders of the error model and stores them in 'sigmas' More... | |
DoubleReal | getPValue (DoubleReal sigma1, DoubleReal sigma2, std::pair< DoubleReal, DoubleReal > point) |
calculates a p-value for a given data point using the model parameters More... | |
void | getDecisionValues (svm_problem *data, std::vector< DoubleReal > &decision_values) |
stores the prediction values for the encoded data in 'decision_values' More... | |
void | scaleData (svm_problem *data, Int max_scale_value=-1) |
Scales the data such that every coloumn is scaled to [-1, 1]. More... | |
svm_problem * | computeKernelMatrix (svm_problem *problem1, svm_problem *problem2) |
computes the kernel matrix using the actual svm parameters and the given data More... | |
svm_problem * | computeKernelMatrix (const SVMData &problem1, const SVMData &problem2) |
computes the kernel matrix using the actual svm parameters and the given data More... | |
void | setTrainingSample (svm_problem *training_sample) |
This is used for being able to perform predictions with non libsvm standard kernels. More... | |
void | setTrainingSample (SVMData &training_sample) |
This is used for being able to perform predictions with non libsvm standard kernels. More... | |
void | getSVCProbabilities (struct svm_problem *problem, std::vector< DoubleReal > &probabilities, std::vector< DoubleReal > &prediction_labels) |
This function fills probabilities with the probability estimates for the first class. More... | |
void | setWeights (const std::vector< Int > &weight_labels, const std::vector< DoubleReal > &weights) |
Sets weights for the classes in C_SVC (see libsvm documentation for further details) More... | |
![]() | |
ProgressLogger () | |
Constructor. More... | |
~ProgressLogger () | |
Destructor. More... | |
void | setLogType (LogType type) const |
Sets the progress log that should be used. The default type is NONE! More... | |
LogType | getLogType () const |
Returns the type of progress log being used. More... | |
void | startProgress (SignedSize begin, SignedSize end, const String &label) const |
Initializes the progress display. More... | |
void | setProgress (SignedSize value) const |
Sets the current progress. More... | |
void | endProgress () const |
Ends the progress display. More... | |
Static Public Member Functions | |
static void | createRandomPartitions (svm_problem *problem, Size number, std::vector< svm_problem * > &partitions) |
You can create 'number' equally sized random partitions. More... | |
static void | createRandomPartitions (const SVMData &problem, Size number, std::vector< SVMData > &problems) |
You can create 'number' equally sized random partitions. More... | |
static svm_problem * | mergePartitions (const std::vector< svm_problem * > &problems, Size except) |
You can merge partitions excuding the partition with index 'except'. More... | |
static void | mergePartitions (const std::vector< SVMData > &problems, Size except, SVMData &merged_problem) |
You can merge partitions excuding the partition with index 'except'. More... | |
static void | getLabels (svm_problem *problem, std::vector< DoubleReal > &labels) |
Stores the stored labels of the encoded SVM data at 'labels'. More... | |
static DoubleReal | kernelOligo (const std::vector< std::pair< int, double > > &x, const std::vector< std::pair< int, double > > &y, const std::vector< double > &gauss_table, int max_distance=-1) |
returns the value of the oligo kernel for sequences 'x' and 'y' More... | |
static DoubleReal | kernelOligo (const svm_node *x, const svm_node *y, const std::vector< DoubleReal > &gauss_table, DoubleReal sigma_square=0, Size max_distance=50) |
calculates the oligo kernel value for the encoded sequences 'x' and 'y' More... | |
static void | calculateGaussTable (Size border_length, DoubleReal sigma, std::vector< DoubleReal > &gauss_table) |
Private Member Functions | |
bool | nextGrid_ (const std::vector< DoubleReal > &start_values, const std::vector< DoubleReal > &step_sizes, const std::vector< DoubleReal > &end_values, const bool additive_step_sizes, std::vector< DoubleReal > &actual_values) |
find next grid search parameter combination More... | |
Size | getNumberOfEnclosedPoints_ (DoubleReal m1, DoubleReal m2, const std::vector< std::pair< DoubleReal, DoubleReal > > &points) |
void | initParameters_ () |
Initializes the svm with standard parameters. More... | |
Static Private Member Functions | |
static void | printToVoid_ (const char *) |
This function is passed to lib svm for output control. More... | |
Private Attributes | |
svm_parameter * | param_ |
svm_model * | model_ |
DoubleReal | sigma_ |
std::vector< DoubleReal > | sigmas_ |
std::vector< DoubleReal > | gauss_table_ |
std::vector< std::vector < DoubleReal > > | gauss_tables_ |
Size | kernel_type_ |
Size | border_length_ |
svm_problem * | training_set_ |
svm_problem * | training_problem_ |
SVMData | training_data_ |
Additional Inherited Members | |
![]() | |
LogType | type_ |
SignedSize | begin_ |
SignedSize | end_ |
SignedSize | value_ |
QProgressDialog * | dlg_ |
StopWatch | stop_watch_ |
time_t | last_invoke_ |
![]() | |
static int | recursion_depth_ |
Serves as a wrapper for the libsvm.
This class can be used for svm predictions. You can either perform classification or regression and choose certain kernel fuctions and additional parameters. Furthermore the models can be saved and loaded and we support also a new kernel function that was specially designed for learning with small sequences of different lengths.
enum SVM_kernel_type |
enum SVM_parameter_type |
Parameters for the svm to be set from outside.
This type is used to specify the kind of parameter that is to be set or retrieved by the set/getParameter methods.
SVMWrapper | ( | ) |
standard constructor
|
virtual |
destructor
|
static |
svm_problem* computeKernelMatrix | ( | svm_problem * | problem1, |
svm_problem * | problem2 | ||
) |
computes the kernel matrix using the actual svm parameters and the given data
This function can be used to compute a kernel matrix. 'problem1' and 'problem2' are used together wit the oligo kernel function (could be extended if you want to use your own kernel functions).
computes the kernel matrix using the actual svm parameters and the given data
This function can be used to compute a kernel matrix. 'problem1' and 'problem2' are used together wit the oligo kernel function (could be extended if you want to use your own kernel functions).
|
static |
You can create 'number' equally sized random partitions.
This function creates 'number' equally sized random partitions and stores them in 'partitions'.
|
static |
You can create 'number' equally sized random partitions.
This function creates 'number' equally sized random partitions and stores them in 'partitions'.
void getDecisionValues | ( | svm_problem * | data, |
std::vector< DoubleReal > & | decision_values | ||
) |
stores the prediction values for the encoded data in 'decision_values'
This function can be used to get the prediction values of the data if a model is already trained by the train() method. For regression the result is the same as for the method predict. For classification this function returns the distance from the separating hyperplane. For multiclass classification the decision_values vector will be empty.
DoubleReal getDoubleParameter | ( | SVM_parameter_type | type | ) |
You can get the actual double- parameters of the svm.
C: the C parameter of the svm P: the P parameter of the svm (sets the epsilon in epsilon-svr) NU: the nu parameter in nu-SVR GAMMA: for POLY, RBF and SIGMOID
Int getIntParameter | ( | SVM_parameter_type | type | ) |
You can get the actual int- parameters of the svm.
KERNEL_TYPE: can be LINEAR for the linear kernel RBF for the rbf kernel POLY for the polynomial kernel SIGMOID for the sigmoid kernel
DEGREE: the degree for the polynomial- kernel and the locality- improved kernel
SVM_TYPE: the SVm type of the svm: can be NU_SVR or EPSILON_SVR
|
static |
Stores the stored labels of the encoded SVM data at 'labels'.
|
private |
DoubleReal getPValue | ( | DoubleReal | sigma1, |
DoubleReal | sigma2, | ||
std::pair< DoubleReal, DoubleReal > | point | ||
) |
calculates a p-value for a given data point using the model parameters
Uses the model parameters to calculate the p-value for 'point' which has the data entries: measured, predicted retention time.
void getSignificanceBorders | ( | svm_problem * | data, |
std::pair< DoubleReal, DoubleReal > & | borders, | ||
DoubleReal | confidence = 0.95 , |
||
Size | number_of_runs = 5 , |
||
Size | number_of_partitions = 5 , |
||
DoubleReal | step_size = 0.01 , |
||
Size | max_iterations = 1000000 |
||
) |
calculates the significance borders of the error model and stores them in 'sigmas'
void getSignificanceBorders | ( | const SVMData & | data, |
std::pair< DoubleReal, DoubleReal > & | sigmas, | ||
DoubleReal | confidence = 0.95 , |
||
Size | number_of_runs = 5 , |
||
Size | number_of_partitions = 5 , |
||
DoubleReal | step_size = 0.01 , |
||
Size | max_iterations = 1000000 |
||
) |
calculates the significance borders of the error model and stores them in 'sigmas'
void getSVCProbabilities | ( | struct svm_problem * | problem, |
std::vector< DoubleReal > & | probabilities, | ||
std::vector< DoubleReal > & | prediction_labels | ||
) |
This function fills probabilities with the probability estimates for the first class.
The libSVM function svm_predict_probability is called to get probability estimates for the positive class. Since this is only used for binary classification it is sufficient for every test example to report the probability of the test example belonging to the positive class. Probability estimates have to be turned on during training (svm.setParameter(PROBABILITY, 1)), otherwise this method will fill the 'probabilities' vector with -1s.
DoubleReal getSVRProbability | ( | ) |
Returns the probability parameter sigma of the fitted laplace model.
The libsvm is used to fit a laplace model to the prediction values by performing an internal cv using the training set if setParameter(PROBABILITY, 1) was invoked before using train. Look for your libsvm documentation for more details. The model parameter sigma is returned by this method. If no model was fitted during training zero is returned.
|
private |
Initializes the svm with standard parameters.
|
static |
returns the value of the oligo kernel for sequences 'x' and 'y'
This function computes the kernel value of the oligo kernel, which was introduced by Meinicke et al. in 2004. 'x' and 'y' are encoded by encodeOligo and 'gauss_table' has to be constructed by calculateGaussTable.
'max_distance' can be used to speed up the computation even further by restricting the maximum distance between a k_mer at position i in sequence 'x' and a k_mer at position j in sequence 'y'. If i - j > 'max_distance' the value is not added to the kernel value. This approximation is switched off by default (max_distance < 0).
|
static |
calculates the oligo kernel value for the encoded sequences 'x' and 'y'
This kernel function calculates the oligo kernel value [Meinicke 04] for the sequences 'x' and 'y' that had been encoded by the encodeOligoBorder... function of the LibSVMEncoder class.
void loadModel | ( | std::string | modelFilename | ) |
loads the model
The svm- model is loaded. After this, the svm is ready for prediction.
|
static |
You can merge partitions excuding the partition with index 'except'.
|
static |
You can merge partitions excuding the partition with index 'except'.
|
private |
find next grid search parameter combination
The current grid cell is given in actual_values
. The result is returned in actual_values
.
DoubleReal performCrossValidation | ( | svm_problem * | problem_ul, |
const SVMData & | problem_l, | ||
const bool | is_labeled, | ||
const std::map< SVM_parameter_type, DoubleReal > & | start_values_map, | ||
const std::map< SVM_parameter_type, DoubleReal > & | step_sizes_map, | ||
const std::map< SVM_parameter_type, DoubleReal > & | end_values_map, | ||
Size | number_of_partitions, | ||
Size | number_of_runs, | ||
std::map< SVM_parameter_type, DoubleReal > & | best_parameters, | ||
bool | additive_step_sizes = true , |
||
bool | output = false , |
||
String | performances_file_name = "performances.txt" , |
||
bool | mcc_as_performance_measure = false |
||
) |
Performs a CV for the data given by 'problem'.
void predict | ( | struct svm_problem * | problem, |
std::vector< DoubleReal > & | predicted_labels | ||
) |
predicts the labels using the trained model
The prediction process is started and the results are stored in 'predicted_labels'.
void predict | ( | const SVMData & | problem, |
std::vector< DoubleReal > & | results | ||
) |
predicts the labels using the trained model
The prediction process is started and the results are stored in 'predicted_labels'.
void predict | ( | const std::vector< svm_node * > & | vectors, |
std::vector< DoubleReal > & | predicted_rts | ||
) |
predicts the labels using the trained model
The prediction process is started and the results are stored in 'predicted_rts'.
|
staticprivate |
This function is passed to lib svm for output control.
The intention is to discard the output, as we don't need it.
void saveModel | ( | std::string | modelFilename | ) | const |
saves the svm model
The model of the trained svm is saved into 'modelFilename'. Throws an exception if the model cannot be saved.
Exception::UnableToCreateFile |
void scaleData | ( | svm_problem * | data, |
Int | max_scale_value = -1 |
||
) |
Scales the data such that every coloumn is scaled to [-1, 1].
Scales the x[][].value values of the svm_problem* structure. If the second parameter is omitted, the data is scaled to [-1, 1]. Otherwise the data is scaled to [0, max_scale_value]
void setParameter | ( | SVM_parameter_type | type, |
Int | value | ||
) |
You can set the parameters of the svm:
KERNEL_TYPE: can be LINEAR for the linear kernel RBF for the rbf kernel POLY for the polynomial kernel SIGMOID for the sigmoid kernel DEGREE: the degree for the polynomial- kernel and the locality- improved kernel
C: the C parameter of the svm
void setParameter | ( | SVM_parameter_type | type, |
DoubleReal | value | ||
) |
sets the double parameters of the svm
void setTrainingSample | ( | svm_problem * | training_sample | ) |
This is used for being able to perform predictions with non libsvm standard kernels.
void setTrainingSample | ( | SVMData & | training_sample | ) |
This is used for being able to perform predictions with non libsvm standard kernels.
void setWeights | ( | const std::vector< Int > & | weight_labels, |
const std::vector< DoubleReal > & | weights | ||
) |
Sets weights for the classes in C_SVC (see libsvm documentation for further details)
Int train | ( | struct svm_problem * | problem | ) |
trains the svm
The svm is trained with the data stored in the 'svm_problem' structure.
trains the svm
The svm is trained with the data stored in the 'SVMData' structure.
|
private |
|
private |
|
private |
|
private |
|
private |
|
private |
|
private |
|
private |
|
private |
|
private |
|
private |
OpenMS / TOPP release 1.11.1 | Documentation generated on Thu Nov 14 2013 11:19:27 using doxygen 1.8.5 |