35 #ifndef OPENMS_ANALYSIS_SVM_SVMWRAPPER_H
36 #define OPENMS_ANALYSIS_SVM_SVMWRAPPER_H
59 std::vector<std::vector<std::pair<Int, DoubleReal> > >
sequences;
68 SVMData(std::vector<std::vector<std::pair<Int, DoubleReal> > > & seqs, std::vector<DoubleReal> & lbls) :
82 std::ofstream output_file(filename.c_str());
93 output_file <<
labels[i] <<
" ";
98 output_file << std::endl;
109 std::vector<String> parts;
110 std::vector<String> temp_parts;
125 TextFile text_file(filename.c_str(),
true);
126 TextFile::iterator it;
128 it = text_file.begin();
130 sequences.resize(text_file.size(), std::vector<std::pair<Int, DoubleReal> >());
131 labels.resize(text_file.size(), 0.);
132 while (counter < text_file.size() && it != text_file.end())
134 it->split(
' ', parts);
135 labels[counter] = parts[0].trim().toFloat();
136 sequences[counter].resize(parts.size(), std::pair<Int, DoubleReal>());
137 for (
Size j = 1; j < parts.size(); ++j)
139 parts[j].split(
':', temp_parts);
140 if (temp_parts.size() < 2)
144 sequences[counter][j - 1].second = temp_parts[0].trim().toFloat();
145 sequences[counter][j - 1].first = temp_parts[1].trim().toInt();
213 void setParameter(SVM_parameter_type type,
Int value);
219 void setParameter(SVM_parameter_type type,
DoubleReal value);
226 Int train(
struct svm_problem * problem);
243 void saveModel(std::string modelFilename)
const;
251 void loadModel(std::string modelFilename);
259 void predict(
struct svm_problem * problem, std::vector<DoubleReal> & predicted_labels);
267 void predict(
const SVMData & problem, std::vector<DoubleReal> & results);
282 Int getIntParameter(SVM_parameter_type type);
293 DoubleReal getDoubleParameter(SVM_parameter_type type);
301 static void createRandomPartitions(svm_problem * problem,
Size number, std::vector<svm_problem *> & partitions);
309 static void createRandomPartitions(
const SVMData & problem,
311 std::vector<SVMData> & problems);
316 static svm_problem * mergePartitions(
const std::vector<svm_problem *> & problems,
Size except);
322 static void mergePartitions(
const std::vector<SVMData> & problems,
332 void predict(
const std::vector<svm_node *> & vectors, std::vector<DoubleReal> & predicted_rts);
338 static void getLabels(svm_problem * problem, std::vector<DoubleReal> & labels);
344 DoubleReal performCrossValidation(svm_problem * problem_ul,
346 const bool is_labeled,
347 const std::map<SVM_parameter_type, DoubleReal> & start_values_map,
348 const std::map<SVM_parameter_type, DoubleReal> & step_sizes_map,
349 const std::map<SVM_parameter_type, DoubleReal> & end_values_map,
350 Size number_of_partitions,
352 std::map<SVM_parameter_type, DoubleReal> & best_parameters,
353 bool additive_step_sizes =
true,
355 String performances_file_name =
"performances.txt",
356 bool mcc_as_performance_measure =
false);
385 static DoubleReal kernelOligo(
const std::vector<std::pair<int, double> > & x,
386 const std::vector<std::pair<int, double> > & y,
387 const std::vector<double> & gauss_table,
388 int max_distance = -1);
397 static DoubleReal kernelOligo(
const svm_node * x,
const svm_node * y,
const std::vector<DoubleReal> & gauss_table,
DoubleReal sigma_square = 0,
Size max_distance = 50);
403 void getSignificanceBorders(svm_problem * data, std::pair<DoubleReal, DoubleReal> & borders,
DoubleReal confidence = 0.95,
Size number_of_runs = 5,
Size number_of_partitions = 5,
DoubleReal step_size = 0.01,
Size max_iterations = 1000000);
409 void getSignificanceBorders(
const SVMData & data,
410 std::pair<DoubleReal, DoubleReal> & sigmas,
412 Size number_of_runs = 5,
413 Size number_of_partitions = 5,
415 Size max_iterations = 1000000);
436 void getDecisionValues(svm_problem * data, std::vector<DoubleReal> & decision_values);
444 void scaleData(svm_problem * data,
Int max_scale_value = -1);
446 static void calculateGaussTable(
Size border_length,
DoubleReal sigma, std::vector<DoubleReal> & gauss_table);
456 svm_problem * computeKernelMatrix(svm_problem * problem1, svm_problem * problem2);
466 svm_problem * computeKernelMatrix(
const SVMData & problem1,
const SVMData & problem2);
472 void setTrainingSample(svm_problem * training_sample);
478 void setTrainingSample(
SVMData & training_sample);
489 void getSVCProbabilities(
struct svm_problem * problem, std::vector<DoubleReal> & probabilities, std::vector<DoubleReal> & prediction_labels);
495 void setWeights(
const std::vector<Int> & weight_labels,
const std::vector<DoubleReal> & weights);
505 bool nextGrid_(
const std::vector<DoubleReal> & start_values,
506 const std::vector<DoubleReal> & step_sizes,
507 const std::vector<DoubleReal> & end_values,
508 const bool additive_step_sizes,
509 std::vector<DoubleReal> & actual_values);
511 Size getNumberOfEnclosedPoints_(
DoubleReal m1,
DoubleReal m2,
const std::vector<std::pair<DoubleReal, DoubleReal> > & points);
517 void initParameters_();
525 static void printToVoid_(
const char * );
543 #endif // OPENMS_ANALYSIS_SVM_SVMWRAPPER_H
the C parameter of the svm
Definition: SVMWrapper.h:179
static bool writable(const String &file)
Return true if the file is writable.
bool operator==(const SVMData &rhs) const
Definition: SVMWrapper.h:74
A more convenient string class.
Definition: String.h:56
SVMData training_data_
Definition: SVMWrapper.h:537
svm_model * model_
Definition: SVMWrapper.h:528
svm_problem * training_problem_
Definition: SVMWrapper.h:536
Serves as a wrapper for the libsvm.
Definition: SVMWrapper.h:163
svm_problem * training_set_
Definition: SVMWrapper.h:535
the epsilon parameter for epsilon-SVR
Definition: SVMWrapper.h:181
SVM_kernel_type
Kernel type.
Definition: SVMWrapper.h:189
std::vector< DoubleReal > gauss_table_
Definition: SVMWrapper.h:531
std::vector< DoubleReal > sigmas_
Definition: SVMWrapper.h:530
Size border_length_
Definition: SVMWrapper.h:534
Size kernel_type_
Definition: SVMWrapper.h:533
svm_parameter * param_
Definition: SVMWrapper.h:527
static bool exists(const String &file)
Method used to test if a file exists.
Data structure used in SVMWrapper.
Definition: SVMWrapper.h:57
the svm type cab be NU_SVR or EPSILON_SVR
Definition: SVMWrapper.h:176
SVMData()
Definition: SVMWrapper.h:62
Definition: SVMWrapper.h:184
the gamma parameter of the POLY, RBF and SIGMOID kernel
Definition: SVMWrapper.h:182
Definition: SVMWrapper.h:183
std::vector< std::vector< DoubleReal > > gauss_tables_
Definition: SVMWrapper.h:532
DoubleReal sigma_
Definition: SVMWrapper.h:529
SVMData(std::vector< std::vector< std::pair< Int, DoubleReal > > > &seqs, std::vector< DoubleReal > &lbls)
Definition: SVMWrapper.h:68
static bool empty(const String &file)
Return true if the file does not exist or the file is empty.
the degree for the polynomial- kernel
Definition: SVMWrapper.h:178
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition: Types.h:144
std::vector< DoubleReal > labels
Definition: SVMWrapper.h:60
Base class for all classes that want to report their progess.
Definition: ProgressLogger.h:56
std::vector< std::vector< std::pair< Int, DoubleReal > > > sequences
Definition: SVMWrapper.h:59
bool store(const String &filename) const
Definition: SVMWrapper.h:80
int Int
Signed integer type.
Definition: Types.h:100
static bool readable(const String &file)
Return true if the file exists and is readable.
the nu parameter for nu-SVR
Definition: SVMWrapper.h:180
This class provides some basic file handling methods for text files.
Definition: TextFile.h:47
the kernel type
Definition: SVMWrapper.h:177
bool load(const String &filename)
Definition: SVMWrapper.h:106
SVM_parameter_type
Parameters for the svm to be set from outside.
Definition: SVMWrapper.h:174