Home  · Classes  · Annotated Classes  · Modules  · Members  · Namespaces  · Related Pages
SVMWrapper.h
Go to the documentation of this file.
1 // --------------------------------------------------------------------------
2 // OpenMS -- Open-Source Mass Spectrometry
3 // --------------------------------------------------------------------------
4 // Copyright The OpenMS Team -- Eberhard Karls University Tuebingen,
5 // ETH Zurich, and Freie Universitaet Berlin 2002-2013.
6 //
7 // This software is released under a three-clause BSD license:
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above copyright
11 // notice, this list of conditions and the following disclaimer in the
12 // documentation and/or other materials provided with the distribution.
13 // * Neither the name of any author or any participating institution
14 // may be used to endorse or promote products derived from this software
15 // without specific prior written permission.
16 // For a full list of authors, refer to the file AUTHORS.
17 // --------------------------------------------------------------------------
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 // ARE DISCLAIMED. IN NO EVENT SHALL ANY OF THE AUTHORS OR THE CONTRIBUTING
22 // INSTITUTIONS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
25 // OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
27 // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
28 // ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 //
30 // --------------------------------------------------------------------------
31 // $Maintainer: Sandro Andreotti $
32 // $Authors: Nico Pfeifer, Chris Bielow $
33 // --------------------------------------------------------------------------
34 
35 #ifndef OPENMS_ANALYSIS_SVM_SVMWRAPPER_H
36 #define OPENMS_ANALYSIS_SVM_SVMWRAPPER_H
37 
38 #include <svm.h>
39 
40 #include <OpenMS/CONCEPT/Types.h>
43 #include <OpenMS/FORMAT/TextFile.h>
44 #include <OpenMS/SYSTEM/File.h>
45 
46 #include <string>
47 #include <vector>
48 #include <map>
49 #include <cmath>
50 #include <iostream>
51 #include <fstream>
52 
53 namespace OpenMS
54 {
55 
57  struct SVMData
58  {
59  std::vector<std::vector<std::pair<Int, DoubleReal> > > sequences;
60  std::vector<DoubleReal> labels;
61 
62  SVMData() :
63  sequences(std::vector<std::vector<std::pair<Int, DoubleReal> > >()),
64  labels(std::vector<DoubleReal>())
65  {
66  }
67 
68  SVMData(std::vector<std::vector<std::pair<Int, DoubleReal> > > & seqs, std::vector<DoubleReal> & lbls) :
69  sequences(seqs),
70  labels(lbls)
71  {
72  }
73 
74  bool operator==(const SVMData & rhs) const
75  {
76  return sequences == rhs.sequences
77  && labels == rhs.labels;
78  }
79 
80  bool store(const String & filename) const
81  {
82  std::ofstream output_file(filename.c_str());
83 
84  // checking if file is writable
85  if (!File::writable(filename) || sequences.size() != labels.size())
86  {
87  return false;
88  }
89 
90  // writing feature vectors
91  for (Size i = 0; i < sequences.size(); i++)
92  {
93  output_file << labels[i] << " ";
94  for (Size j = 0; j < sequences[i].size(); ++j)
95  {
96  output_file << sequences[i][j].second << ":" << sequences[i][j].first << " ";
97  }
98  output_file << std::endl;
99  }
100  output_file.flush();
101  output_file.close();
102  std::cout.flush();
103  return true;
104  }
105 
106  bool load(const String & filename)
107  {
108  Size counter = 0;
109  std::vector<String> parts;
110  std::vector<String> temp_parts;
111 
112  if (!File::exists(filename))
113  {
114  return false;
115  }
116  if (!File::readable(filename))
117  {
118  return false;
119  }
120  if (File::empty(filename))
121  {
122  return false;
123  }
124 
125  TextFile text_file(filename.c_str(), true);
126  TextFile::iterator it;
127 
128  it = text_file.begin();
129 
130  sequences.resize(text_file.size(), std::vector<std::pair<Int, DoubleReal> >());
131  labels.resize(text_file.size(), 0.);
132  while (counter < text_file.size() && it != text_file.end())
133  {
134  it->split(' ', parts);
135  labels[counter] = parts[0].trim().toFloat();
136  sequences[counter].resize(parts.size(), std::pair<Int, DoubleReal>());
137  for (Size j = 1; j < parts.size(); ++j)
138  {
139  parts[j].split(':', temp_parts);
140  if (temp_parts.size() < 2)
141  {
142  return false;
143  }
144  sequences[counter][j - 1].second = temp_parts[0].trim().toFloat();
145  sequences[counter][j - 1].first = temp_parts[1].trim().toInt();
146  }
147  ++counter;
148  ++it;
149  }
150  return true;
151  }
152 
153  };
154 
163  class OPENMS_DLLAPI SVMWrapper :
164  public ProgressLogger
165  {
166 public:
167 
175  {
179  C,
180  NU,
181  P,
185  BORDER_LENGTH
186  };
187 
190  {
191  OLIGO = 19,
192  OLIGO_COMBINED
193  };
194 
196  SVMWrapper();
197 
199  virtual ~SVMWrapper();
200 
213  void setParameter(SVM_parameter_type type, Int value);
214 
219  void setParameter(SVM_parameter_type type, DoubleReal value);
220 
226  Int train(struct svm_problem * problem);
227 
233  Int train(SVMData & problem);
234 
243  void saveModel(std::string modelFilename) const;
244 
251  void loadModel(std::string modelFilename);
252 
259  void predict(struct svm_problem * problem, std::vector<DoubleReal> & predicted_labels);
260 
267  void predict(const SVMData & problem, std::vector<DoubleReal> & results);
268 
282  Int getIntParameter(SVM_parameter_type type);
283 
293  DoubleReal getDoubleParameter(SVM_parameter_type type);
294 
301  static void createRandomPartitions(svm_problem * problem, Size number, std::vector<svm_problem *> & partitions);
302 
309  static void createRandomPartitions(const SVMData & problem,
310  Size number,
311  std::vector<SVMData> & problems);
316  static svm_problem * mergePartitions(const std::vector<svm_problem *> & problems, Size except);
317 
322  static void mergePartitions(const std::vector<SVMData> & problems,
323  Size except,
324  SVMData & merged_problem);
325 
332  void predict(const std::vector<svm_node *> & vectors, std::vector<DoubleReal> & predicted_rts);
333 
338  static void getLabels(svm_problem * problem, std::vector<DoubleReal> & labels);
339 
344  DoubleReal performCrossValidation(svm_problem * problem_ul,
345  const SVMData & problem_l,
346  const bool is_labeled,
347  const std::map<SVM_parameter_type, DoubleReal> & start_values_map,
348  const std::map<SVM_parameter_type, DoubleReal> & step_sizes_map,
349  const std::map<SVM_parameter_type, DoubleReal> & end_values_map,
350  Size number_of_partitions,
351  Size number_of_runs,
352  std::map<SVM_parameter_type, DoubleReal> & best_parameters,
353  bool additive_step_sizes = true,
354  bool output = false,
355  String performances_file_name = "performances.txt",
356  bool mcc_as_performance_measure = false);
357 
358 
368  DoubleReal getSVRProbability();
369 
385  static DoubleReal kernelOligo(const std::vector<std::pair<int, double> > & x,
386  const std::vector<std::pair<int, double> > & y,
387  const std::vector<double> & gauss_table,
388  int max_distance = -1);
389 
397  static DoubleReal kernelOligo(const svm_node * x, const svm_node * y, const std::vector<DoubleReal> & gauss_table, DoubleReal sigma_square = 0, Size max_distance = 50);
398 
403  void getSignificanceBorders(svm_problem * data, std::pair<DoubleReal, DoubleReal> & borders, DoubleReal confidence = 0.95, Size number_of_runs = 5, Size number_of_partitions = 5, DoubleReal step_size = 0.01, Size max_iterations = 1000000);
404 
409  void getSignificanceBorders(const SVMData & data,
410  std::pair<DoubleReal, DoubleReal> & sigmas,
411  DoubleReal confidence = 0.95,
412  Size number_of_runs = 5,
413  Size number_of_partitions = 5,
414  DoubleReal step_size = 0.01,
415  Size max_iterations = 1000000);
416 
424  DoubleReal getPValue(DoubleReal sigma1, DoubleReal sigma2, std::pair<DoubleReal, DoubleReal> point);
425 
436  void getDecisionValues(svm_problem * data, std::vector<DoubleReal> & decision_values);
437 
444  void scaleData(svm_problem * data, Int max_scale_value = -1);
445 
446  static void calculateGaussTable(Size border_length, DoubleReal sigma, std::vector<DoubleReal> & gauss_table);
447 
456  svm_problem * computeKernelMatrix(svm_problem * problem1, svm_problem * problem2);
457 
466  svm_problem * computeKernelMatrix(const SVMData & problem1, const SVMData & problem2);
467 
472  void setTrainingSample(svm_problem * training_sample);
473 
478  void setTrainingSample(SVMData & training_sample);
479 
489  void getSVCProbabilities(struct svm_problem * problem, std::vector<DoubleReal> & probabilities, std::vector<DoubleReal> & prediction_labels);
490 
495  void setWeights(const std::vector<Int> & weight_labels, const std::vector<DoubleReal> & weights);
496 
497 private:
505  bool nextGrid_(const std::vector<DoubleReal> & start_values,
506  const std::vector<DoubleReal> & step_sizes,
507  const std::vector<DoubleReal> & end_values,
508  const bool additive_step_sizes,
509  std::vector<DoubleReal> & actual_values);
510 
511  Size getNumberOfEnclosedPoints_(DoubleReal m1, DoubleReal m2, const std::vector<std::pair<DoubleReal, DoubleReal> > & points);
512 
517  void initParameters_();
518 
525  static void printToVoid_(const char * /*s*/);
526 
527  svm_parameter * param_; // the parameters for the svm
528  svm_model * model_; // the learnt svm discriminant
529  DoubleReal sigma_; // for the oligo kernel (amount of positional smearing)
530  std::vector<DoubleReal> sigmas_; // for the combined oligo kernel (amount of positional smearing)
531  std::vector<DoubleReal> gauss_table_; // lookup table for fast computation of the oligo kernel
532  std::vector<std::vector<DoubleReal> > gauss_tables_; // lookup table for fast computation of the combined oligo kernel
533  Size kernel_type_; // the actual kernel type
534  Size border_length_; // the actual kernel type
535  svm_problem * training_set_; // the training set
536  svm_problem * training_problem_; // the training set
537  SVMData training_data_; // the training set (different encoding)
538 
539  };
540 
541 } // namespace OpenMS
542 
543 #endif // OPENMS_ANALYSIS_SVM_SVMWRAPPER_H
the C parameter of the svm
Definition: SVMWrapper.h:179
static bool writable(const String &file)
Return true if the file is writable.
bool operator==(const SVMData &rhs) const
Definition: SVMWrapper.h:74
A more convenient string class.
Definition: String.h:56
SVMData training_data_
Definition: SVMWrapper.h:537
svm_model * model_
Definition: SVMWrapper.h:528
svm_problem * training_problem_
Definition: SVMWrapper.h:536
Serves as a wrapper for the libsvm.
Definition: SVMWrapper.h:163
svm_problem * training_set_
Definition: SVMWrapper.h:535
the epsilon parameter for epsilon-SVR
Definition: SVMWrapper.h:181
SVM_kernel_type
Kernel type.
Definition: SVMWrapper.h:189
std::vector< DoubleReal > gauss_table_
Definition: SVMWrapper.h:531
std::vector< DoubleReal > sigmas_
Definition: SVMWrapper.h:530
Size border_length_
Definition: SVMWrapper.h:534
Size kernel_type_
Definition: SVMWrapper.h:533
svm_parameter * param_
Definition: SVMWrapper.h:527
static bool exists(const String &file)
Method used to test if a file exists.
Data structure used in SVMWrapper.
Definition: SVMWrapper.h:57
the svm type cab be NU_SVR or EPSILON_SVR
Definition: SVMWrapper.h:176
SVMData()
Definition: SVMWrapper.h:62
Definition: SVMWrapper.h:184
the gamma parameter of the POLY, RBF and SIGMOID kernel
Definition: SVMWrapper.h:182
Definition: SVMWrapper.h:183
std::vector< std::vector< DoubleReal > > gauss_tables_
Definition: SVMWrapper.h:532
DoubleReal sigma_
Definition: SVMWrapper.h:529
SVMData(std::vector< std::vector< std::pair< Int, DoubleReal > > > &seqs, std::vector< DoubleReal > &lbls)
Definition: SVMWrapper.h:68
static bool empty(const String &file)
Return true if the file does not exist or the file is empty.
the degree for the polynomial- kernel
Definition: SVMWrapper.h:178
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition: Types.h:144
std::vector< DoubleReal > labels
Definition: SVMWrapper.h:60
Base class for all classes that want to report their progess.
Definition: ProgressLogger.h:56
std::vector< std::vector< std::pair< Int, DoubleReal > > > sequences
Definition: SVMWrapper.h:59
bool store(const String &filename) const
Definition: SVMWrapper.h:80
int Int
Signed integer type.
Definition: Types.h:100
static bool readable(const String &file)
Return true if the file exists and is readable.
the nu parameter for nu-SVR
Definition: SVMWrapper.h:180
This class provides some basic file handling methods for text files.
Definition: TextFile.h:47
the kernel type
Definition: SVMWrapper.h:177
bool load(const String &filename)
Definition: SVMWrapper.h:106
SVM_parameter_type
Parameters for the svm to be set from outside.
Definition: SVMWrapper.h:174

OpenMS / TOPP release 1.11.1 Documentation generated on Thu Nov 14 2013 11:19:21 using doxygen 1.8.5