Home  · Classes  · Annotated Classes  · Modules  · Members  · Namespaces  · Related Pages
ModelFitter.h
Go to the documentation of this file.
1 // --------------------------------------------------------------------------
2 // OpenMS -- Open-Source Mass Spectrometry
3 // --------------------------------------------------------------------------
4 // Copyright The OpenMS Team -- Eberhard Karls University Tuebingen,
5 // ETH Zurich, and Freie Universitaet Berlin 2002-2013.
6 //
7 // This software is released under a three-clause BSD license:
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above copyright
11 // notice, this list of conditions and the following disclaimer in the
12 // documentation and/or other materials provided with the distribution.
13 // * Neither the name of any author or any participating institution
14 // may be used to endorse or promote products derived from this software
15 // without specific prior written permission.
16 // For a full list of authors, refer to the file AUTHORS.
17 // --------------------------------------------------------------------------
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 // ARE DISCLAIMED. IN NO EVENT SHALL ANY OF THE AUTHORS OR THE CONTRIBUTING
22 // INSTITUTIONS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
25 // OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
27 // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
28 // ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 //
30 // --------------------------------------------------------------------------
31 // $Maintainer: Clemens Groepl $
32 // $Authors: $
33 // --------------------------------------------------------------------------
34 
35 
36 #ifndef OPENMS_TRANSFORMATIONS_FEATUREFINDER_MODELFITTER_H
37 #define OPENMS_TRANSFORMATIONS_FEATUREFINDER_MODELFITTER_H
38 
47 #include <OpenMS/CONCEPT/Factory.h>
48 
49 #include <boost/math/special_functions/fpclassify.hpp>
50 
51 #include <iostream>
52 #include <fstream>
53 #include <numeric>
54 #include <cmath>
55 #include <vector>
56 #include <set>
57 
58 namespace OpenMS
59 {
60 
76  template <class PeakType, class FeatureType>
77  class ModelFitter :
78  public FeaFiModule<PeakType, FeatureType>,
79  public FeatureFinderDefs
80  {
81 public:
82 
84  typedef IndexSet::const_iterator IndexSetIter;
96  typedef std::vector<PeakType> RawDataArrayType;
97 
98  enum
99  {
102  };
103 
106  Base(map, features, ff),
107  model2D_(),
108  mz_stat_(),
109  rt_stat_(),
110  monoisotopic_mz_(0),
111 #ifdef DEBUG_FEATUREFINDER
112  counter_(1),
113 #endif
114  iso_stdev_first_(0),
115  iso_stdev_last_(0),
117  first_mz_model_(0),
118  last_mz_model_(0),
119  quality_rt_(0),
120  quality_mz_(0)
121  {
122  this->setName("ModelFitter");
123 
124  this->defaults_.setValue("fit_algorithm", "simple", "Fitting algorithm type (internal parameter).", StringList::create("advanced"));
125  std::vector<String> fit_opts;
126  fit_opts.push_back("simple");
127  fit_opts.push_back("simplest");
128  fit_opts.push_back("wavelet");
129  this->defaults_.setValidStrings("fit_algorithm", fit_opts);
130 
131  this->defaults_.setValue("max_iteration", 500, "Maximum number of iterations for fitting with Levenberg-Marquardt algorithm.", StringList::create("advanced"));
132  this->defaults_.setMinInt("max_iteration", 1);
133  this->defaults_.setValue("deltaAbsError", 0.0001, "Absolute error used by the Levenberg-Marquardt algorithm.", StringList::create("advanced"));
134  this->defaults_.setMinFloat("deltaAbsError", 0.0);
135  this->defaults_.setValue("deltaRelError", 0.0001, "Relative error used by the Levenberg-Marquardt algorithm.", StringList::create("advanced"));
136  this->defaults_.setMinFloat("deltaRelError", 0.0);
137 
138  this->defaults_.setValue("tolerance_stdev_bounding_box", 3.0f, "Bounding box has range [minimim of data, maximum of data] enlarged by tolerance_stdev_bounding_box times the standard deviation of the data", StringList::create("advanced"));
139  this->defaults_.setMinFloat("tolerance_stdev_bounding_box", 0.0);
140 
141  this->defaults_.setValue("intensity_cutoff_factor", 0.05f, "Cutoff peaks with a predicted intensity below intensity_cutoff_factor times the maximal intensity of the model");
142  this->defaults_.setMinFloat("intensity_cutoff_factor", 0.0);
143  this->defaults_.setMaxFloat("intensity_cutoff_factor", 1.0);
144 
145  this->defaults_.setValue("feature_intensity_sum", 1, "Determines what is reported as feature intensity.\n1: the sum of peak intensities;\n0: the maximum intensity of all peaks", StringList::create("advanced"));
146  this->defaults_.setMinInt("feature_intensity_sum", 0);
147  this->defaults_.setMaxInt("feature_intensity_sum", 1);
148 
149  this->defaults_.setValue("min_num_peaks:final", 5, "Minimum number of peaks left after cutoff. If smaller, feature will be discarded.");
150  this->defaults_.setMinInt("min_num_peaks:final", 1);
151  this->defaults_.setValue("min_num_peaks:extended", 10, "Minimum number of peaks after extension. If smaller, feature will be discarded.");
152  this->defaults_.setMinInt("min_num_peaks:extended", 1);
153  this->defaults_.setSectionDescription("min_num_peaks", "Required number of peaks for a feature.");
154 
155  this->defaults_.setValue("rt:interpolation_step", 0.2f, "Step size in seconds used to interpolate model for RT.");
156  this->defaults_.setMinFloat("rt:interpolation_step", 0.0);
157  this->defaults_.setSectionDescription("rt", "Model settings in RT dimension.");
158 
159  this->defaults_.setValue("mz:interpolation_step", 0.03f, "Interpolation step size for m/z.");
160  this->defaults_.setMinFloat("mz:interpolation_step", 0.001);
161  this->defaults_.setValue("mz:model_type:first", 1, "Numeric id of first m/z model fitted (usually indicating the charge state), 0 = no isotope pattern (fit a single gaussian).");
162  this->defaults_.setMinInt("mz:model_type:first", 0);
163  this->defaults_.setValue("mz:model_type:last", 4, "Numeric id of last m/z model fitted (usually indicating the charge state), 0 = no isotope pattern (fit a single gaussian).");
164  this->defaults_.setMinInt("mz:model_type:last", 0);
165  this->defaults_.setSectionDescription("mz", "Model settings in m/z dimension.");
166 
167  this->defaults_.setValue("quality:type", "Correlation", "Type of the quality measure used to assess the fit of model vs data.", StringList::create("advanced"));
168  std::vector<String> quality_opts;
169  quality_opts.push_back("Correlation");
170  quality_opts.push_back("RankCorrelation");
171  this->defaults_.setValidStrings("quality:type", quality_opts);
172  this->defaults_.setValue("quality:minimum", 0.65f, "Minimum quality of fit, features below this threshold are discarded.");
173  this->defaults_.setMinFloat("quality:minimum", 0.0);
174  this->defaults_.setMaxFloat("quality:minimum", 1.0);
175  this->defaults_.setSectionDescription("quality", "Fitting quality settings.");
176 
177  this->defaults_.setValue("isotope_model:stdev:first", 0.04f, "First standard deviation to be considered for isotope model.");
178  this->defaults_.setMinFloat("isotope_model:stdev:first", 0.0);
179  this->defaults_.setValue("isotope_model:stdev:last", 0.12f, "Last standard deviation to be considered for isotope model.");
180  this->defaults_.setMinFloat("isotope_model:stdev:last", 0.0);
181  this->defaults_.setValue("isotope_model:stdev:step", 0.04f, "Step size for standard deviations considered for isotope model.");
182  this->defaults_.setMinFloat("isotope_model:stdev:step", 0.0);
183  this->defaults_.setSectionDescription("isotope_model:stdev", "Instrument resolution settings for m/z dimension.");
184 
185  this->defaults_.setValue("isotope_model:averagines:C", 0.04443989f, "Number of C atoms per Dalton of the mass.", StringList::create("advanced"));
186  this->defaults_.setMinFloat("isotope_model:averagines:C", 0.0);
187  this->defaults_.setValue("isotope_model:averagines:H", 0.06981572f, "Number of H atoms per Dalton of the mass.", StringList::create("advanced"));
188  this->defaults_.setMinFloat("isotope_model:averagines:H", 0.0);
189  this->defaults_.setValue("isotope_model:averagines:N", 0.01221773f, "Number of N atoms per Dalton of the mass.", StringList::create("advanced"));
190  this->defaults_.setMinFloat("isotope_model:averagines:N", 0.0);
191  this->defaults_.setValue("isotope_model:averagines:O", 0.01329399f, "Number of O atoms per Dalton of the mass.", StringList::create("advanced"));
192  this->defaults_.setMinFloat("isotope_model:averagines:O", 0.0);
193  this->defaults_.setValue("isotope_model:averagines:S", 0.00037525f, "Number of S atoms per Dalton of the mass.", StringList::create("advanced"));
194  this->defaults_.setMinFloat("isotope_model:averagines:S", 0.0);
195  this->defaults_.setSectionDescription("isotope_model:averagines", "Averagines are used to approximate the number of atoms (C,H,N,O,S) which a peptide of a given mass contains.");
196 
197  this->defaults_.setValue("isotope_model:isotope:trim_right_cutoff", 0.001f, "Cutoff for averagine distribution, trailing isotopes below this relative intensity are not considered.", StringList::create("advanced"));
198  this->defaults_.setMinFloat("isotope_model:isotope:trim_right_cutoff", 0.0);
199  this->defaults_.setValue("isotope_model:isotope:maximum", 100, "Maximum number of isotopes being used for the IsotopeModel.", StringList::create("advanced"));
200  this->defaults_.setMinInt("isotope_model:isotope:maximum", 1);
201  this->defaults_.setValue("isotope_model:isotope:distance", 1.000495f, "Distance between consecutive isotopic peaks.", StringList::create("advanced"));
202  this->defaults_.setMinFloat("isotope_model:isotope:distance", 0.0);
203  this->defaults_.setSectionDescription("isotope_model", "Settings of the isotope model (m/z).");
204 
205  this->defaultsToParam_();
206  }
207 
209  virtual ~ModelFitter()
210  {
211  }
212 
216  {
217  monoisotopic_mz_ = mz;
218  }
219 
226  Feature fit(const ChargedIndexSet & index_set)
227  {
228  // Test the number of peaks (not enough peaks to fit)
229  if (index_set.size() < UInt(this->param_.getValue("min_num_peaks:extended")))
230  {
231  String mess = String("Skipping feature, IndexSet size too small: ") + index_set.size();
232  throw Exception::UnableToFit(__FILE__, __LINE__, __PRETTY_FUNCTION__, "UnableToFit-IndexSet", mess.c_str());
233  }
234 
235  // Calculate statistics for mz and rt
237  (
238  Internal::IntensityIterator<ModelFitter>(index_set.begin(), this),
239  Internal::IntensityIterator<ModelFitter>(index_set.end(), this),
240  Internal::MzIterator<ModelFitter>(index_set.begin(), this)
241  );
243  (
244  Internal::IntensityIterator<ModelFitter>(index_set.begin(), this),
245  Internal::IntensityIterator<ModelFitter>(index_set.end(), this),
246  Internal::RtIterator<ModelFitter>(index_set.begin(), this)
247  );
248 
249  // set charge
250  if (index_set.charge != 0)
251  {
252  first_mz_model_ = index_set.charge;
253  last_mz_model_ = index_set.charge;
254  }
255 
256  if (first_mz_model_ > last_mz_model_) throw Exception::InvalidParameter(__FILE__, __LINE__, __PRETTY_FUNCTION__, "ModelFitter::fit(): charge range tested is not valid; check \"model_type:first\" and \"model_type:last\" ");
257 
258  // Check charge estimate if charge is not specified by user
259 #ifdef DEBUG_FEATUREFINDER
260  std::cout << "Checking charge state from " << first_mz_model_ << " to " << last_mz_model_ << std::endl;
261 #endif
262 
263  // ** Projection
264  doProjectionDim_(RT, index_set, rt_input_data_);
265  doProjectionDim_(MZ, index_set, mz_input_data_);
266  // Fit rt model
267  InterpolationModel * model_rt = 0;
268  quality_rt_ = fitRT_(model_rt);
269  model2D_.setModel(RT, model_rt); // Set model in 2D-model
270 
271  FeatureMap<Feature> feature_collection;
272 
273  for (ChargeType charge = first_mz_model_; charge <= last_mz_model_; ++charge)
274  {
275  Feature f;
276  try
277  {
278  // "reset" model2D!!
279  model2D_.setScale(1.);
280 
281  // TODO: intensity scaling should use a more robust estimator (rather than comparing max of data vs. model)
282 
283  // Compute model with the best correlation
284  // result is in model2D_
285  QualityType max_quality = fitMZLoop_(index_set, charge);
286 
287  // find peak with highest predicted intensity to use as cutoff
288  IntensityType model_max = 0;
289 
290  for (IndexSetIter it = index_set.begin(); it != index_set.end(); ++it)
291  {
292  IntensityType model_int = model2D_.getIntensity(DPosition<2>(this->getPeakRt(*it), this->getPeakMz(*it)));
293  if (model_int > model_max) model_max = model_int;
294  }
295  model2D_.setCutOff(model_max * Real(this->param_.getValue("intensity_cutoff_factor")));
296 
297  // Cutoff low intensities wrt to model maximum -> cutoff independent of scaling
298  IndexSet model_set;
299  for (IndexSetIter it = index_set.begin(); it != index_set.end(); ++it)
300  {
301  if (model2D_.isContained(DPosition<2>(this->getPeakRt(*it), this->getPeakMz(*it))))
302  {
303  model_set.insert(*it);
304  }
305  }
306 
307  // Print number of selected peaks after cutoff
308 #ifdef DEBUG_FEATUREFINDER
309  std::cout << " Selected " << model_set.size() << " from " << index_set.size() << " peaks.\n";
310 #endif
311 
312  // Calculate intensity scaling
313  IntensityType model_sum = 0;
314  //IntensityType data_sum = 0;
315  IntensityType data_max = 0;
316  for (IndexSetIter it = model_set.begin(); it != model_set.end(); ++it)
317  {
318  IntensityType model_int = model2D_.getIntensity(DPosition<2>(this->getPeakRt(*it), this->getPeakMz(*it)));
319  model_sum += model_int;
320  //data_sum += this->getPeakIntensity( *it );
321  if (this->getPeakIntensity(*it) > data_max) data_max = this->getPeakIntensity(*it);
322  }
323 
324  if (model_sum == 0)
325  {
326  throw Exception::UnableToFit(__FILE__, __LINE__, __PRETTY_FUNCTION__, "UnableToFit-ZeroSum", "Skipping feature, model_sum zero.");
327  }
328 
329  //std::cout << "data_max: " << data_max << " model_max: " << model_max << " model2dscale: " << (data_max / model_max) << std::endl;
330  model2D_.setScale(data_max / model_max); // use max quotient instead of sum quotient
331 
332  //std::cout << model2D_.getParameters() << std::endl;
333 
334  // Build Feature
335  // The feature coordinate in rt dimension is given
336  // by the centroid of the rt model whereas the coordinate
337  // in mz dimension is equal to the monoisotopic peak.
339  f.setOverallQuality(max_quality);
340  f.setRT(static_cast<InterpolationModel *>(model2D_.getModel(RT))->getCenter());
341  f.setMZ(static_cast<InterpolationModel *>(model2D_.getModel(MZ))->getCenter());
342 
343  // set and check convex hull whether m/z is contained or not
344  this->addConvexHull(model_set, f);
345  if (!f.encloses(f.getRT(), f.getMZ())) f.setMZ(f.getConvexHull().getBoundingBox().minY());
346 
347  // feature charge ...
348  // if we used a simple Gaussian model to fit the feature, we can't say anything about
349  // its charge state. The value 0 indicates that charge state is undetermined.
350  if (model2D_.getModel(MZ)->getName() == "LmaIsotopeModel")
351  {
352  f.setCharge(static_cast<LmaIsotopeModel *>(model2D_.getModel(MZ))->getCharge());
353  }
354  else if (model2D_.getModel(MZ)->getName() == "IsotopeModel")
355  {
356  f.setCharge(static_cast<IsotopeModel *>(model2D_.getModel(MZ))->getCharge());
357  }
358  else if (model2D_.getModel(MZ)->getName() == "ExtendedIsotopeModel")
359  {
360  f.setCharge(static_cast<ExtendedIsotopeModel *>(model2D_.getModel(MZ))->getCharge());
361  }
362  else
363  {
364  f.setCharge(0);
365  }
366 
367  // feature intensity
368  Int const intensity_choice = this->param_.getValue("feature_intensity_sum");
369  IntensityType feature_intensity = 0.0;
370  if (intensity_choice == 1)
371  {
372  // intensity of the feature is the sum of all included data points
373  for (IndexSetIter it = model_set.begin(); it != model_set.end(); ++it)
374  {
375  feature_intensity += this->getPeakIntensity(*it);
376  }
377  }
378  else
379  {
380  // feature intensity is the maximum intensity of all peaks
381  for (IndexSetIter it = model_set.begin(); it != model_set.end(); ++it)
382  {
383  if (this->getPeakIntensity(*it) > feature_intensity)
384  {
385  feature_intensity = this->getPeakIntensity(*it);
386  }
387  }
388  }
389 
390  // set intensity
391  f.setIntensity(feature_intensity);
392 
393  // set quality (1D)
396 
397  //std::cout << "QA: " << f.getOverallQuality() << " qMZ: " << f.getQuality(1) << " charge: " << f.getCharge() << " stdev: " << f.getModelDescription().getParam().getValue("MZ:isotope:stdev")<< std::endl;
398 
399 #ifdef DEBUG_FEATUREFINDER
400  // debug output
401  if (this->param_.getValue("fit_algorithm") != "wavelet")
402  {
403  std::cout << "Feature " << counter_ << ": (" << f.getRT() << "," << f.getMZ() << ") Qual.: " << max_quality << std::endl;
404  }
405  // Save meta data in feature for TOPPView
406  f.setMetaValue(3, String(counter_));
407 
408  std::cout << "Feature charge: " << f.getCharge() << std::endl;
409  std::cout << "Feature quality in mz: " << f.getQuality(MZ) << std::endl;
410 
411  // write debug output
412  CoordinateType rt = f.getRT();
413  CoordinateType mz = f.getMZ();
414 
415  // write feature model
416  String fname = String("model") + counter_ + "_" + rt + "_" + mz;
417  std::ofstream file(fname.c_str());
418  for (IndexSetIter it = model_set.begin(); it != model_set.end(); ++it)
419  {
420  DPosition<2> pos = DPosition<2>(this->getPeakRt(*it), this->getPeakMz(*it));
421  if (model2D_.isContained(pos))
422  {
423  file << pos[RT] << " " << pos[MZ] << " " << model2D_.getIntensity(DPosition<2>(this->getPeakRt(*it), this->getPeakMz(*it))) << "\n";
424  }
425  }
426  file.close();
427 
428  // wrote peaks remaining after model fit
429  fname = String("feature") + counter_ + "_" + rt + "_" + mz;
430  std::ofstream file2(fname.c_str());
431  for (IndexSetIter it = model_set.begin(); it != model_set.end(); ++it)
432  {
433  DPosition<2> pos = DPosition<2>(this->getPeakRt(*it), this->getPeakMz(*it));
434  if (model2D_.isContained(pos))
435  {
436  file2 << pos[RT] << " " << pos[MZ] << " " << this->getPeakIntensity(*it) << "\n";
437  }
438  }
439  file2.close();
440 
441  // Count features
442  ++counter_;
443 
444 #endif
445 
446  feature_collection.push_back(f);
447  } // ! try
448  catch (Exception::UnableToFit & /*e*/)
449  {
450  //std::cout << "CAUGHT!! " << e.getName() << " " << e.getMessage() << std::endl;
451  }
452  }
453 
454  if (feature_collection.empty())
455  {
456  String mess = String("Skipping feature, nothing in the feature collection.");
457  throw Exception::UnableToFit(__FILE__, __LINE__, __PRETTY_FUNCTION__, "UnableToFit-EmptyFeatureCollection", mess.c_str());
458  }
459 
460  QualityType best_quality = -std::numeric_limits<QualityType>::max();
461  // find best feature
462  std::size_t best_idx = 0;
463  for (std::size_t idx = 0; idx < feature_collection.size(); ++idx)
464  {
465  if (best_quality < feature_collection[idx].getOverallQuality())
466  {
467  best_quality = feature_collection[idx].getOverallQuality();
468  best_idx = idx;
469  }
470  }
471 
472  Feature best_feature = feature_collection[best_idx];
473  // check some more conditions
474  // not enough peaks left for feature
475 
476  // fit has too low quality or fit was not possible i.e. because of zero stdev
477  if (best_feature.getOverallQuality() < (Real) (this->param_.getValue("quality:minimum")))
478  {
479  String mess = String("Skipping feature, correlation too small: ") + best_feature.getOverallQuality();
480  throw Exception::UnableToFit(__FILE__, __LINE__, __PRETTY_FUNCTION__, "UnableToFit-Correlation", mess.c_str());
481  }
482 
483  // free unused peaks for best feature
484  IndexSet model_set;
485  ProductModel<2> * best_model = static_cast<ProductModel<2> *>(best_feature.getModelDescription().createModel());
486  for (IndexSetIter it = index_set.begin(); it != index_set.end(); ++it)
487  {
488  if (best_model->isContained(DPosition<2>(this->getPeakRt(*it), this->getPeakMz(*it))))
489  {
490  model_set.insert(*it);
491  }
492  else
493  {
494  this->ff_->getPeakFlag(*it) = UNUSED;
495  }
496  }
497  delete best_model;
498  if (model_set.size() < (UInt) (this->param_.getValue("min_num_peaks:final")))
499  {
500  throw Exception::UnableToFit(__FILE__, __LINE__, __PRETTY_FUNCTION__, "UnableToFit-FinalSet", String("Skipping feature, IndexSet size after cutoff too small: ") + model_set.size());
501  }
502 
503  // add all but the best feature to the suboptimal ones
504  for (std::size_t idx = 0; idx < feature_collection.size(); ++idx)
505  {
506  if (idx == best_idx) continue;
507  best_feature.getSubordinates().push_back(feature_collection[idx]);
508  }
509 
510  // return "best" feature
511  return best_feature;
512 
513  }
514 
515 protected:
516 
517  virtual void updateMembers_()
518  {
519  algorithm_ = this->param_.getValue("fit_algorithm");
520 
521  max_iteration_ = this->param_.getValue("max_iteration");
522  deltaAbsError_ = this->param_.getValue("deltaAbsError");
523  deltaRelError_ = this->param_.getValue("deltaRelError");
524 
525  tolerance_stdev_box_ = this->param_.getValue("tolerance_stdev_bounding_box");
526  max_isotope_ = this->param_.getValue("isotope_model:isotope:maximum");
527 
528  interpolation_step_mz_ = this->param_.getValue("mz:interpolation_step");
529  interpolation_step_rt_ = this->param_.getValue("rt:interpolation_step");
530 
531  iso_stdev_first_ = this->param_.getValue("isotope_model:stdev:first");
532  iso_stdev_last_ = this->param_.getValue("isotope_model:stdev:last");
533  iso_stdev_stepsize_ = this->param_.getValue("isotope_model:stdev:step");
534 
535  first_mz_model_ = (Int) this->param_.getValue("mz:model_type:first");
536  last_mz_model_ = (Int) this->param_.getValue("mz:model_type:last");
537  }
538 
540  QualityType fitMZLoop_(const ChargedIndexSet & set, const ChargeType & charge)
541  {
542 
543  // Fit mz model ... test different charge states and stdevs
544  QualityType max_quality_mz = -std::numeric_limits<QualityType>::max();
545 
546  InterpolationModel * best_model_mz = 0;
547  for (Real stdev = iso_stdev_first_; stdev <= iso_stdev_last_; stdev += iso_stdev_stepsize_)
548  {
549  isotope_stdev_ = stdev;
550 
551  InterpolationModel * model_mz = 0;
552  quality_mz_ = fitMZ_(model_mz, charge);
553 
554 
555  if (quality_mz_ > max_quality_mz)
556  {
557  max_quality_mz = quality_mz_;
558  if (best_model_mz) delete best_model_mz;
559  best_model_mz = model_mz;
560  }
561  else
562  {
563  delete model_mz;
564  }
565  }
566 
567  model2D_.setModel(MZ, best_model_mz); // Set model in 2D-model
568 
569  quality_mz_ = max_quality_mz;
570 
571  // return overall quality
572  return evaluate_(set);
573  }
574 
576  QualityType evaluate_(const IndexSet & set) const
577  {
578  QualityType quality = 0.0;
579 
580  // Calculate the pearson correlation coefficient for the values in [begin_a, end_a) and [begin_b, end_b)
581  if (algorithm_ != "")
582  {
583  std::vector<Real> real_data;
584  real_data.reserve(set.size());
585  std::vector<Real> model_data;
586  model_data.reserve(set.size());
587 
588  for (IndexSet::const_iterator it = set.begin(); it != set.end(); ++it)
589  {
590  real_data.push_back(this->getPeakIntensity(*it));
591  model_data.push_back(model2D_.getIntensity(DPosition<2>(this->getPeakRt(*it), this->getPeakMz(*it))));
592  }
593 
594  if (this->param_.getValue("quality:type") == "RankCorrelation")
595  {
596  quality = Math::rankCorrelationCoefficient(real_data.begin(), real_data.end(), model_data.begin(), model_data.end());
597  }
598  else quality = Math::pearsonCorrelationCoefficient(real_data.begin(), real_data.end(), model_data.begin(), model_data.end());
599  }
600 
601  if (boost::math::isnan(quality)) quality = -1.0;
602 
603  return quality;
604  }
605 
608  {
609  QualityType quality;
610  Param param;
611  Fitter1D * fitter;
612 
613  if (algorithm_ == "simplest") // Fit with BiGauss
614  {
615  param.setValue("tolerance_stdev_bounding_box", tolerance_stdev_box_);
616  param.setValue("statistics:mean", rt_stat_.mean());
617  param.setValue("statistics:variance", rt_stat_.variance());
618  param.setValue("statistics:variance1", rt_stat_.variance1());
619  param.setValue("statistics:variance2", rt_stat_.variance2());
620  param.setValue("interpolation_step", interpolation_step_rt_);
621 
622  fitter = Factory<Fitter1D>::create("BiGaussFitter1D");
623  }
624  else // Fit with EMG (LM optimization)
625  {
626  param.setValue("tolerance_stdev_bounding_box", tolerance_stdev_box_);
627  param.setValue("statistics:mean", rt_stat_.mean());
628  param.setValue("statistics:variance", rt_stat_.variance());
629  param.setValue("interpolation_step", interpolation_step_rt_);
630  param.setValue("max_iteration", max_iteration_);
631  param.setValue("deltaAbsError", deltaAbsError_);
632  param.setValue("deltaRelError", deltaRelError_);
633 
634  fitter = Factory<Fitter1D>::create("EmgFitter1D");
635  }
636 
637  // Set parameter for fitter
638  fitter->setParameters(param);
639 
640  // Construct model for rt
641  quality = fitter->fit1d(rt_input_data_, model);
642  // Check quality
643  if (boost::math::isnan(quality)) quality = -1.0;
644 
645  delete(fitter);
646 
647  return quality;
648  }
649 
651  QualityType fitMZ_(InterpolationModel * & model, const ChargeType & charge) const
652  {
653  QualityType quality;
654  Param param;
655  Fitter1D * fitter;
656 
657  param.setValue("tolerance_stdev_bounding_box", tolerance_stdev_box_);
658  param.setValue("statistics:mean", mz_stat_.mean());
659  param.setValue("statistics:variance", mz_stat_.variance());
660  param.setValue("interpolation_step", interpolation_step_mz_);
661 
662  if (monoisotopic_mz_ != 0) // monoisotopic mz is known
663  {
664  param.setValue("statistics:mean", monoisotopic_mz_);
665  }
666 
667  if (charge != 0) // charge is not zero
668  {
669  param.setValue("charge", charge);
670  param.setValue("isotope:stdev", isotope_stdev_);
671  param.setValue("isotope:maximum", max_isotope_);
672  fitter = Factory<Fitter1D>::create("IsotopeFitter1D");
673  }
674  else // charge is zero
675  {
676  if (algorithm_ == "simplest") // Fit with GaussModel
677  {
678  param.setValue("charge", charge);
679  param.setValue("isotope:stdev", isotope_stdev_);
680  param.setValue("isotope:maximum", max_isotope_);
681  fitter = Factory<Fitter1D>::create("IsotopeFitter1D");
682  }
683  else // Fit with LmaGaussModel
684  {
685  param.setValue("max_iteration", max_iteration_);
686  param.setValue("deltaAbsError", deltaAbsError_);
687  param.setValue("deltaRelError", deltaRelError_);
688  fitter = Factory<Fitter1D>::create("LmaGaussFitter1D");
689  }
690  }
691 
692  // Set parameter for fitter
693  fitter->setParameters(param);
694 
695  // Construct model for mz
696  quality = fitter->fit1d(mz_input_data_, model);
697 
698  //std::cout << "model after fitting: " << model->getParameters() << std::endl << std::endl;
699 
700  // Check quality
701  if (boost::math::isnan(quality)) quality = -1.0;
702 
703  delete(fitter);
704 
705  return quality;
706  }
707 
709  void doProjectionDim_(const Int dim, const ChargedIndexSet & index_set, RawDataArrayType & set) const
710  {
711 
712  if (algorithm_ != "")
713  {
714  std::map<CoordinateType, CoordinateType> data_map;
715 
716  if (dim == MZ)
717  {
718  for (IndexSet::const_iterator it = index_set.begin(); it != index_set.end(); ++it)
719  {
720  data_map[this->getPeakMz(*it)] += this->getPeakIntensity(*it);
721  }
722  }
723  else
724  {
725  for (IndexSet::const_iterator it = index_set.begin(); it != index_set.end(); ++it)
726  {
727  data_map[this->getPeakRt(*it)] += this->getPeakIntensity(*it);
728  }
729  }
730 
731  // Copy the raw data into set
732  set.resize(data_map.size());
733  std::map<CoordinateType, CoordinateType>::iterator it;
734  UInt i = 0;
735  for (it = data_map.begin(); it != data_map.end(); ++it, ++i)
736  {
737  set[i].setPosition((*it).first);
738  set[i].setIntensity((*it).second);
739  }
740 
741  }
742 
743  }
744 
759 #ifdef DEBUG_FEATUREFINDER
760  UInt counter_;
762 #endif
796 
797 private:
798 
800  ModelFitter();
802  ModelFitter & operator=(const ModelFitter &);
804  ModelFitter(const ModelFitter &);
805 
806  };
807 
808 }
809 
810 #endif // OPENMS_TRANSFORMATIONS_FEATUREFINDER_MODELFITTER_H
ProductModel & setModel(UInt dim, BaseModel< 1 > *dist)
set model dist for dimension dim
Definition: ProductModel.h:184
void setMetaValue(const String &name, const DataValue &value)
sets the DataValue corresponding to a name
const ChargeType & getCharge() const
Non-mutable access to charge state.
void update(ProbabilityIterator const probability_begin, ProbabilityIterator const probability_end, CoordinateIterator const coordinate_begin)
You can call this as often as you like, using different input vectors.
Definition: AsymmetricStatistics.h:94
Implements a module of the FeatureFinder algorithm.
Definition: FeaFiModule.h:157
Int first_mz_model_
first mz model (0=Gaussian, 1....n = charge )
Definition: ModelFitter.h:776
CoordinateType tolerance_stdev_box_
tolerance used for bounding box
Definition: ModelFitter.h:756
RealType variance2() const
&quot;variance to the right hand side&quot;
Definition: AsymmetricStatistics.h:87
DoubleReal CoordinateType
Coordinate type (of the position)
Definition: Peak2D.h:65
Definition: ModelFitter.h:100
Real IntensityType
Intensity type.
Definition: Peak2D.h:63
ModelFitter()
Not implemented.
RawDataArrayType mz_input_data_
mz raw data
Definition: ModelFitter.h:752
float Real
Real type.
Definition: Types.h:109
Param defaults_
Container for default parameters. This member should be filled in the constructor of derived classes!...
Definition: DefaultParamHandler.h:155
Feature::ChargeType ChargeType
Isotope charge.
Definition: ModelFitter.h:92
CoordinateType iso_stdev_stepsize_
step size
Definition: ModelFitter.h:774
void setValue(const String &key, const DataValue &value, const String &description="", const StringList &tags=StringList())
Sets a value.
A more convenient string class.
Definition: String.h:56
const ModelDescription< 2 > & getModelDescription() const
Non-mutable access to the model description.
CoordinateType getPeakRt(const FeatureFinderDefs::IndexPair &index) const
Returns the retention time of a peak.
Definition: FeaFiModule.h:210
CoordinateType deltaAbsError_
Absolute error.
Definition: ModelFitter.h:787
void setMZ(CoordinateType coordinate)
Mutable access to the m/z coordinate (index 1)
Definition: Peak2D.h:197
QualityType getOverallQuality() const
Non-mutable access to the overall quality.
IntensityType getPeakIntensity(const FeatureFinderDefs::IndexPair &index) const
Returns the intensity of a peak.
Definition: FeaFiModule.h:190
IntensityType getIntensity(const PositionType &pos) const
intensity equals product of intensities in each dimension
Definition: ProductModel.h:151
Param param_
Container for current parameters.
Definition: DefaultParamHandler.h:148
Int ChargeType
Type of charge values.
Definition: BaseFeature.h:64
Definition: FeatureFinderDefs.h:63
ConvexHull2D & getConvexHull() const
Returns the overall convex hull of the feature (calculated from the convex hulls of the mass traces) ...
RealType variance() const
Returns the variance.
Definition: BasicStatistics.h:175
Retention time dimension id (0 if used as a const int)
Definition: Peak2D.h:76
PeakType::IntensityType IntensityType
Input intensity type.
Definition: FeaFiModule.h:168
RealType mean() const
Returns the mean.
Definition: BasicStatistics.h:171
A container for features.
Definition: FeatureMap.h:111
virtual QualityType fit1d(const RawDataArrayType &, InterpolationModel *&)
return interpolation model
Definition: Fitter1D.h:95
unsigned int UInt
Unsigned integer type.
Definition: Types.h:92
Feature::QualityType QualityType
Quality of a feature.
Definition: ModelFitter.h:86
std::vector< PeakType > RawDataArrayType
Raw data container type using for the temporary storage of the input data.
Definition: ModelFitter.h:96
CoordinateType deltaRelError_
Relative error.
Definition: ModelFitter.h:789
void update(ProbabilityIterator probability_begin, ProbabilityIterator const probability_end)
This does the actual calculation.
Definition: BasicStatistics.h:114
Internal class for asymmetric distributions.
Definition: AsymmetricStatistics.h:59
ProductModel< 2 > model2D_
2D model
Definition: ModelFitter.h:746
FeaFiModule< PeakType, FeatureType > Base
FeaFiModule.
Definition: ModelFitter.h:94
bool encloses(DoubleReal rt, DoubleReal mz) const
Returns if the mass trace convex hulls of the feature enclose the position specified by rt and mz...
void setSectionDescription(const String &key, const String &description)
Sets a description for an existing section.
void setMonoIsotopicMass(CoordinateType mz)
Sets or fixed the monoisotopic m/z at a specific position.
Definition: ModelFitter.h:215
Mass-to-charge dimension id (1 if used as a const int)
Definition: Peak2D.h:77
void setModelDescription(const ModelDescription< 2 > &q)
Set the model description.
CoordinateType getRT() const
Returns the RT coordinate (index 0)
Definition: Peak2D.h:203
IndexSet::const_iterator IndexSetIter
IndexSet iterator.
Definition: ModelFitter.h:84
The class template is only implemented for D=2 because we use Peak2D here.
Definition: ProductModel.h:65
Calculates some basic statistical parameters of a distribution: sum, mean, variance, and provides the normal approximation.
Definition: BasicStatistics.h:68
Int max_iteration_
Maximum number of iterations.
Definition: ModelFitter.h:784
static FactoryProduct * create(const String &name)
return FactoryProduct according to unique identifier name
Definition: Factory.h:113
void setParameters(const Param &param)
Sets the parameters.
FeatureFinder * ff_
Pointer to the calling FeatureFinder that is used to access the feature flags and report progress...
Definition: FeaFiModule.h:415
The purpose of this struct is to provide definitions of classes and typedefs which are used throughou...
Definition: FeatureFinderDefs.h:51
Feature::CoordinateType CoordinateType
Single coordinate.
Definition: ModelFitter.h:88
void setIntensity(IntensityType intensity)
Non-mutable access to the data point intensity (height)
Definition: Peak2D.h:167
QualityType quality_rt_
fit quality in RT dimension
Definition: ModelFitter.h:793
void setMaxInt(const String &key, Int max)
Sets the maximum value for the integer or integer list parameter key.
QualityType fitMZLoop_(const ChargedIndexSet &set, const ChargeType &charge)
main fit loop
Definition: ModelFitter.h:540
RawDataArrayType rt_input_data_
rt raw data
Definition: ModelFitter.h:754
Abstract class for 1D-models that are approximated using linear interpolation.
Definition: InterpolationModel.h:55
BaseModel< D > * createModel()
Definition: ModelDescription.h:97
QualityType fitRT_(InterpolationModel *&model) const
1d fit in RT
Definition: ModelFitter.h:607
CoordinateType interpolation_step_rt_
interpolation step size (in retention time)
Definition: ModelFitter.h:766
CoordinateType getPeakMz(const FeatureFinderDefs::IndexPair &index) const
Returns the m/z of a peak.
Definition: FeaFiModule.h:200
const DataValue & getValue(const String &key) const
Returns a value of a parameter.
void setRT(CoordinateType coordinate)
Mutable access to the RT coordinate (index 0)
Definition: Peak2D.h:209
virtual void updateMembers_()
This method is used to update extra member variables at the end of the setParameters() method...
Definition: ModelFitter.h:517
const String & getName() const
Non-mutable access to the name.
static DoubleReal pearsonCorrelationCoefficient(IteratorType1 begin_a, IteratorType1 end_a, IteratorType2 begin_b, IteratorType2 end_b)
Calculates the Pearson correlation coefficient for the values in [begin_a, end_a) and [begin_b...
Definition: StatisticFunctions.h:285
Math::BasicStatistics basic_stat_
statistics
Definition: ModelFitter.h:791
CoordinateType interpolation_step_mz_
interpolation step size (in m/z)
Definition: ModelFitter.h:764
BaseModel< 1 > * getModel(UInt dim) const
Definition: ProductModel.h:204
const std::vector< Feature > & getSubordinates() const
immutable access to subordinate features
QualityType getQuality(Size index) const
Non-mutable access to the quality in dimension c.
ModelFitter(const MSExperiment< PeakType > *map, FeatureMap< FeatureType > *features, FeatureFinder *ff)
Constructor.
Definition: ModelFitter.h:105
void setQuality(Size index, QualityType q)
Set the quality in dimension c.
IsotopeCluster::IndexSet IndexSet
A set of peak indices.
Definition: FeatureFinderDefs.h:60
void addConvexHull(const FeatureFinderDefs::IndexSet &set, Feature &feature) const
Calculates the convex hull of a index set and adds it to the feature.
Definition: FeaFiModule.h:393
Retention time iterator for a FeatureFinderDefs::IndexSet.
Definition: FeaFiModule.h:132
CoordinateType isotope_stdev_
isotope stdev
Definition: ModelFitter.h:780
CoordinateType monoisotopic_mz_
monoistopic mass
Definition: ModelFitter.h:758
RealType variance1() const
&quot;variance to the left hand side&quot;
Definition: AsymmetricStatistics.h:81
static StringList create(const String &list, const char splitter= ',')
Returns a list that is created by splitting the given (comma-separated) string (String are not trimme...
Tests a group of data points in an LC-MS map for goodness-of-fit with a 2D averagine model...
Definition: ModelFitter.h:77
String algorithm_
algorithm
Definition: ModelFitter.h:782
Real QualityType
Type of quality values.
Definition: BaseFeature.h:62
DBoundingBox< 2 > getBoundingBox() const
returns the bounding box of the feature hull points
Exception indicating that an invalid parameter was handed over to an algorithm.
Definition: Exception.h:348
QualityType evaluate_(const IndexSet &set) const
evaluate 2d-model
Definition: ModelFitter.h:576
An LC-MS feature.
Definition: Feature.h:66
void setScale(IntensityType scale)
set the intensity scaling factor
Definition: ProductModel.h:217
static DoubleReal rankCorrelationCoefficient(IteratorType1 begin_a, IteratorType1 end_a, IteratorType2 begin_b, IteratorType2 end_b)
calculates the rank correlation coefficient for the values in [begin_a, end_a) and [begin_b...
Definition: StatisticFunctions.h:379
Int max_isotope_
maximum isotopic rank to be considered
Definition: ModelFitter.h:768
void setValidStrings(const String &key, const std::vector< String > &strings)
Sets the valid strings for the parameter key.
Management and storage of parameters / INI files.
Definition: Param.h:69
CoordinateType getMZ() const
Returns the m/z coordinate (index 1)
Definition: Peak2D.h:191
Definition: ModelFitter.h:101
Representation of a mass spectrometry experiment.
Definition: MSExperiment.h:68
m/z iterator for a FeatureFinderDefs::IndexSet
Definition: FeaFiModule.h:110
QualityType fitMZ_(InterpolationModel *&model, const ChargeType &charge) const
1d fit in MZ
Definition: ModelFitter.h:651
QualityType quality_mz_
fit quality in MZ dimension
Definition: ModelFitter.h:795
Intensity iterator for a FeatureFinderDefs::IndexSet.
Definition: FeaFiModule.h:88
void setOverallQuality(QualityType q)
Set the overall quality.
Int last_mz_model_
last mz model
Definition: ModelFitter.h:778
void doProjectionDim_(const Int dim, const ChargedIndexSet &index_set, RawDataArrayType &set) const
Project the raw data into 1-dim array.
Definition: ModelFitter.h:709
Exception used if an error occurred while fitting a model to a given dataset.
Definition: Exception.h:662
void setMinInt(const String &key, Int min)
Sets the minimum value for the integer or integer list parameter key.
virtual bool isContained(const PositionType &pos) const
check if position pos is part of the model regarding the models cut-off.
Definition: BaseModel.h:101
index set with associated charge estimate
Definition: IsotopeCluster.h:53
Abstract base class for all 1D-dimensional model fitter.
Definition: Fitter1D.h:59
CoordinateType iso_stdev_last_
last stdev
Definition: ModelFitter.h:772
void setCharge(const ChargeType &ch)
Set charge state.
#define DEBUG_FEATUREFINDER
Definition: EGHFitter1D.h:38
Int charge
charge estimate (convention: zero means &quot;no charge estimate&quot;)
Definition: IsotopeCluster.h:62
The main feature finder class.
Definition: FeatureFinder.h:57
void setName(const String &name)
Mutable access to the name.
Math::AsymmetricStatistics rt_stat_
statistics for rt
Definition: ModelFitter.h:750
virtual ~ModelFitter()
Destructor.
Definition: ModelFitter.h:209
const Flag & getPeakFlag(const IndexPair &index) const
Returns a non-mutable reference to a peak flag.
Definition: FeatureFinder.h:91
Math::BasicStatistics mz_stat_
statistics for mz
Definition: ModelFitter.h:748
Feature::IntensityType IntensityType
Single intensity.
Definition: ModelFitter.h:90
int Int
Signed integer type.
Definition: Types.h:100
void setMinFloat(const String &key, DoubleReal min)
Sets the minimum value for the floating point or floating point list parameter key.
Feature fit(const ChargedIndexSet &index_set)
Return next feature.
Definition: ModelFitter.h:226
virtual void setCutOff(IntensityType cut_off)
set cutoff value
Definition: BaseModel.h:135
ModelFitter & operator=(const ModelFitter &)
Not implemented.
void defaultsToParam_()
Updates the parameters after the defaults have been set in the constructor.
void setMaxFloat(const String &key, DoubleReal max)
Sets the maximum value for the floating point or floating point list parameter key.
CoordinateType iso_stdev_first_
first stdev
Definition: ModelFitter.h:770

OpenMS / TOPP release 1.11.1 Documentation generated on Thu Nov 14 2013 11:19:17 using doxygen 1.8.5