36 #ifndef OPENMS_TRANSFORMATIONS_FEATUREFINDER_MODELFITTER_H
37 #define OPENMS_TRANSFORMATIONS_FEATUREFINDER_MODELFITTER_H
49 #include <boost/math/special_functions/fpclassify.hpp>
76 template <
class PeakType,
class FeatureType>
106 Base(map, features, ff),
125 std::vector<String> fit_opts;
126 fit_opts.push_back(
"simple");
127 fit_opts.push_back(
"simplest");
128 fit_opts.push_back(
"wavelet");
138 this->
defaults_.
setValue(
"tolerance_stdev_bounding_box", 3.0f,
"Bounding box has range [minimim of data, maximum of data] enlarged by tolerance_stdev_bounding_box times the standard deviation of the data",
StringList::create(
"advanced"));
141 this->
defaults_.
setValue(
"intensity_cutoff_factor", 0.05f,
"Cutoff peaks with a predicted intensity below intensity_cutoff_factor times the maximal intensity of the model");
145 this->
defaults_.
setValue(
"feature_intensity_sum", 1,
"Determines what is reported as feature intensity.\n1: the sum of peak intensities;\n0: the maximum intensity of all peaks",
StringList::create(
"advanced"));
149 this->
defaults_.
setValue(
"min_num_peaks:final", 5,
"Minimum number of peaks left after cutoff. If smaller, feature will be discarded.");
151 this->
defaults_.
setValue(
"min_num_peaks:extended", 10,
"Minimum number of peaks after extension. If smaller, feature will be discarded.");
155 this->
defaults_.
setValue(
"rt:interpolation_step", 0.2f,
"Step size in seconds used to interpolate model for RT.");
159 this->
defaults_.
setValue(
"mz:interpolation_step", 0.03f,
"Interpolation step size for m/z.");
161 this->
defaults_.
setValue(
"mz:model_type:first", 1,
"Numeric id of first m/z model fitted (usually indicating the charge state), 0 = no isotope pattern (fit a single gaussian).");
163 this->
defaults_.
setValue(
"mz:model_type:last", 4,
"Numeric id of last m/z model fitted (usually indicating the charge state), 0 = no isotope pattern (fit a single gaussian).");
168 std::vector<String> quality_opts;
169 quality_opts.push_back(
"Correlation");
170 quality_opts.push_back(
"RankCorrelation");
172 this->
defaults_.
setValue(
"quality:minimum", 0.65f,
"Minimum quality of fit, features below this threshold are discarded.");
177 this->
defaults_.
setValue(
"isotope_model:stdev:first", 0.04f,
"First standard deviation to be considered for isotope model.");
179 this->
defaults_.
setValue(
"isotope_model:stdev:last", 0.12f,
"Last standard deviation to be considered for isotope model.");
181 this->
defaults_.
setValue(
"isotope_model:stdev:step", 0.04f,
"Step size for standard deviations considered for isotope model.");
195 this->
defaults_.
setSectionDescription(
"isotope_model:averagines",
"Averagines are used to approximate the number of atoms (C,H,N,O,S) which a peptide of a given mass contains.");
197 this->
defaults_.
setValue(
"isotope_model:isotope:trim_right_cutoff", 0.001f,
"Cutoff for averagine distribution, trailing isotopes below this relative intensity are not considered.",
StringList::create(
"advanced"));
231 String mess =
String(
"Skipping feature, IndexSet size too small: ") + index_set.size();
232 throw Exception::UnableToFit(__FILE__, __LINE__, __PRETTY_FUNCTION__,
"UnableToFit-IndexSet", mess.c_str());
250 if (index_set.
charge != 0)
259 #ifdef DEBUG_FEATUREFINDER
290 for (
IndexSetIter it = index_set.begin(); it != index_set.end(); ++it)
293 if (model_int > model_max) model_max = model_int;
299 for (
IndexSetIter it = index_set.begin(); it != index_set.end(); ++it)
303 model_set.insert(*it);
308 #ifdef DEBUG_FEATUREFINDER
309 std::cout <<
" Selected " << model_set.size() <<
" from " << index_set.size() <<
" peaks.\n";
316 for (
IndexSetIter it = model_set.begin(); it != model_set.end(); ++it)
319 model_sum += model_int;
326 throw Exception::UnableToFit(__FILE__, __LINE__, __PRETTY_FUNCTION__,
"UnableToFit-ZeroSum",
"Skipping feature, model_sum zero.");
370 if (intensity_choice == 1)
373 for (
IndexSetIter it = model_set.begin(); it != model_set.end(); ++it)
381 for (
IndexSetIter it = model_set.begin(); it != model_set.end(); ++it)
399 #ifdef DEBUG_FEATUREFINDER
403 std::cout <<
"Feature " << counter_ <<
": (" << f.
getRT() <<
"," << f.
getMZ() <<
") Qual.: " << max_quality << std::endl;
408 std::cout <<
"Feature charge: " << f.
getCharge() << std::endl;
409 std::cout <<
"Feature quality in mz: " << f.
getQuality(
MZ) << std::endl;
416 String fname =
String(
"model") + counter_ +
"_" + rt +
"_" + mz;
417 std::ofstream file(fname.c_str());
418 for (
IndexSetIter it = model_set.begin(); it != model_set.end(); ++it)
429 fname =
String(
"feature") + counter_ +
"_" + rt +
"_" + mz;
430 std::ofstream file2(fname.c_str());
431 for (
IndexSetIter it = model_set.begin(); it != model_set.end(); ++it)
446 feature_collection.push_back(f);
454 if (feature_collection.empty())
456 String mess =
String(
"Skipping feature, nothing in the feature collection.");
457 throw Exception::UnableToFit(__FILE__, __LINE__, __PRETTY_FUNCTION__,
"UnableToFit-EmptyFeatureCollection", mess.c_str());
460 QualityType best_quality = -std::numeric_limits<QualityType>::max();
462 std::size_t best_idx = 0;
463 for (std::size_t idx = 0; idx < feature_collection.size(); ++idx)
465 if (best_quality < feature_collection[idx].getOverallQuality())
467 best_quality = feature_collection[idx].getOverallQuality();
472 Feature best_feature = feature_collection[best_idx];
480 throw Exception::UnableToFit(__FILE__, __LINE__, __PRETTY_FUNCTION__,
"UnableToFit-Correlation", mess.c_str());
486 for (
IndexSetIter it = index_set.begin(); it != index_set.end(); ++it)
490 model_set.insert(*it);
500 throw Exception::UnableToFit(__FILE__, __LINE__, __PRETTY_FUNCTION__,
"UnableToFit-FinalSet",
String(
"Skipping feature, IndexSet size after cutoff too small: ") + model_set.size());
504 for (std::size_t idx = 0; idx < feature_collection.size(); ++idx)
506 if (idx == best_idx)
continue;
544 QualityType max_quality_mz = -std::numeric_limits<QualityType>::max();
558 if (best_model_mz)
delete best_model_mz;
559 best_model_mz = model_mz;
583 std::vector<Real> real_data;
584 real_data.reserve(set.size());
585 std::vector<Real> model_data;
586 model_data.reserve(set.size());
588 for (IndexSet::const_iterator it = set.begin(); it != set.end(); ++it)
601 if (boost::math::isnan(quality)) quality = -1.0;
643 if (boost::math::isnan(quality)) quality = -1.0;
701 if (boost::math::isnan(quality)) quality = -1.0;
714 std::map<CoordinateType, CoordinateType> data_map;
718 for (IndexSet::const_iterator it = index_set.begin(); it != index_set.end(); ++it)
725 for (IndexSet::const_iterator it = index_set.begin(); it != index_set.end(); ++it)
732 set.resize(data_map.size());
733 std::map<CoordinateType, CoordinateType>::iterator it;
735 for (it = data_map.begin(); it != data_map.end(); ++it, ++i)
737 set[i].setPosition((*it).first);
738 set[i].setIntensity((*it).second);
759 #ifdef DEBUG_FEATUREFINDER
810 #endif // OPENMS_TRANSFORMATIONS_FEATUREFINDER_MODELFITTER_H
ProductModel & setModel(UInt dim, BaseModel< 1 > *dist)
set model dist for dimension dim
Definition: ProductModel.h:184
const ChargeType & getCharge() const
Non-mutable access to charge state.
void update(ProbabilityIterator const probability_begin, ProbabilityIterator const probability_end, CoordinateIterator const coordinate_begin)
You can call this as often as you like, using different input vectors.
Definition: AsymmetricStatistics.h:94
Implements a module of the FeatureFinder algorithm.
Definition: FeaFiModule.h:157
Int first_mz_model_
first mz model (0=Gaussian, 1....n = charge )
Definition: ModelFitter.h:776
CoordinateType tolerance_stdev_box_
tolerance used for bounding box
Definition: ModelFitter.h:756
RealType variance2() const
"variance to the right hand side"
Definition: AsymmetricStatistics.h:87
DoubleReal CoordinateType
Coordinate type (of the position)
Definition: Peak2D.h:65
Definition: ModelFitter.h:100
Real IntensityType
Intensity type.
Definition: Peak2D.h:63
ModelFitter()
Not implemented.
RawDataArrayType mz_input_data_
mz raw data
Definition: ModelFitter.h:752
float Real
Real type.
Definition: Types.h:109
Param defaults_
Container for default parameters. This member should be filled in the constructor of derived classes!...
Definition: DefaultParamHandler.h:155
Feature::ChargeType ChargeType
Isotope charge.
Definition: ModelFitter.h:92
CoordinateType iso_stdev_stepsize_
step size
Definition: ModelFitter.h:774
void setValue(const String &key, const DataValue &value, const String &description="", const StringList &tags=StringList())
Sets a value.
A more convenient string class.
Definition: String.h:56
const ModelDescription< 2 > & getModelDescription() const
Non-mutable access to the model description.
CoordinateType getPeakRt(const FeatureFinderDefs::IndexPair &index) const
Returns the retention time of a peak.
Definition: FeaFiModule.h:210
CoordinateType deltaAbsError_
Absolute error.
Definition: ModelFitter.h:787
void setMZ(CoordinateType coordinate)
Mutable access to the m/z coordinate (index 1)
Definition: Peak2D.h:197
QualityType getOverallQuality() const
Non-mutable access to the overall quality.
IntensityType getPeakIntensity(const FeatureFinderDefs::IndexPair &index) const
Returns the intensity of a peak.
Definition: FeaFiModule.h:190
IntensityType getIntensity(const PositionType &pos) const
intensity equals product of intensities in each dimension
Definition: ProductModel.h:151
Param param_
Container for current parameters.
Definition: DefaultParamHandler.h:148
Int ChargeType
Type of charge values.
Definition: BaseFeature.h:64
Definition: FeatureFinderDefs.h:63
ConvexHull2D & getConvexHull() const
Returns the overall convex hull of the feature (calculated from the convex hulls of the mass traces) ...
RealType variance() const
Returns the variance.
Definition: BasicStatistics.h:175
Retention time dimension id (0 if used as a const int)
Definition: Peak2D.h:76
PeakType::IntensityType IntensityType
Input intensity type.
Definition: FeaFiModule.h:168
RealType mean() const
Returns the mean.
Definition: BasicStatistics.h:171
A container for features.
Definition: FeatureMap.h:111
virtual QualityType fit1d(const RawDataArrayType &, InterpolationModel *&)
return interpolation model
Definition: Fitter1D.h:95
unsigned int UInt
Unsigned integer type.
Definition: Types.h:92
Feature::QualityType QualityType
Quality of a feature.
Definition: ModelFitter.h:86
std::vector< PeakType > RawDataArrayType
Raw data container type using for the temporary storage of the input data.
Definition: ModelFitter.h:96
CoordinateType deltaRelError_
Relative error.
Definition: ModelFitter.h:789
void update(ProbabilityIterator probability_begin, ProbabilityIterator const probability_end)
This does the actual calculation.
Definition: BasicStatistics.h:114
Internal class for asymmetric distributions.
Definition: AsymmetricStatistics.h:59
ProductModel< 2 > model2D_
2D model
Definition: ModelFitter.h:746
FeaFiModule< PeakType, FeatureType > Base
FeaFiModule.
Definition: ModelFitter.h:94
bool encloses(DoubleReal rt, DoubleReal mz) const
Returns if the mass trace convex hulls of the feature enclose the position specified by rt and mz...
void setSectionDescription(const String &key, const String &description)
Sets a description for an existing section.
void setMonoIsotopicMass(CoordinateType mz)
Sets or fixed the monoisotopic m/z at a specific position.
Definition: ModelFitter.h:215
Mass-to-charge dimension id (1 if used as a const int)
Definition: Peak2D.h:77
void setModelDescription(const ModelDescription< 2 > &q)
Set the model description.
CoordinateType getRT() const
Returns the RT coordinate (index 0)
Definition: Peak2D.h:203
IndexSet::const_iterator IndexSetIter
IndexSet iterator.
Definition: ModelFitter.h:84
The class template is only implemented for D=2 because we use Peak2D here.
Definition: ProductModel.h:65
Calculates some basic statistical parameters of a distribution: sum, mean, variance, and provides the normal approximation.
Definition: BasicStatistics.h:68
Int max_iteration_
Maximum number of iterations.
Definition: ModelFitter.h:784
static FactoryProduct * create(const String &name)
return FactoryProduct according to unique identifier name
Definition: Factory.h:113
void setParameters(const Param ¶m)
Sets the parameters.
FeatureFinder * ff_
Pointer to the calling FeatureFinder that is used to access the feature flags and report progress...
Definition: FeaFiModule.h:415
The purpose of this struct is to provide definitions of classes and typedefs which are used throughou...
Definition: FeatureFinderDefs.h:51
Feature::CoordinateType CoordinateType
Single coordinate.
Definition: ModelFitter.h:88
void setIntensity(IntensityType intensity)
Non-mutable access to the data point intensity (height)
Definition: Peak2D.h:167
QualityType quality_rt_
fit quality in RT dimension
Definition: ModelFitter.h:793
void setMaxInt(const String &key, Int max)
Sets the maximum value for the integer or integer list parameter key.
QualityType fitMZLoop_(const ChargedIndexSet &set, const ChargeType &charge)
main fit loop
Definition: ModelFitter.h:540
RawDataArrayType rt_input_data_
rt raw data
Definition: ModelFitter.h:754
Abstract class for 1D-models that are approximated using linear interpolation.
Definition: InterpolationModel.h:55
BaseModel< D > * createModel()
Definition: ModelDescription.h:97
QualityType fitRT_(InterpolationModel *&model) const
1d fit in RT
Definition: ModelFitter.h:607
CoordinateType interpolation_step_rt_
interpolation step size (in retention time)
Definition: ModelFitter.h:766
CoordinateType getPeakMz(const FeatureFinderDefs::IndexPair &index) const
Returns the m/z of a peak.
Definition: FeaFiModule.h:200
const DataValue & getValue(const String &key) const
Returns a value of a parameter.
void setRT(CoordinateType coordinate)
Mutable access to the RT coordinate (index 0)
Definition: Peak2D.h:209
virtual void updateMembers_()
This method is used to update extra member variables at the end of the setParameters() method...
Definition: ModelFitter.h:517
const String & getName() const
Non-mutable access to the name.
static DoubleReal pearsonCorrelationCoefficient(IteratorType1 begin_a, IteratorType1 end_a, IteratorType2 begin_b, IteratorType2 end_b)
Calculates the Pearson correlation coefficient for the values in [begin_a, end_a) and [begin_b...
Definition: StatisticFunctions.h:285
Math::BasicStatistics basic_stat_
statistics
Definition: ModelFitter.h:791
CoordinateType interpolation_step_mz_
interpolation step size (in m/z)
Definition: ModelFitter.h:764
BaseModel< 1 > * getModel(UInt dim) const
Definition: ProductModel.h:204
const std::vector< Feature > & getSubordinates() const
immutable access to subordinate features
QualityType getQuality(Size index) const
Non-mutable access to the quality in dimension c.
ModelFitter(const MSExperiment< PeakType > *map, FeatureMap< FeatureType > *features, FeatureFinder *ff)
Constructor.
Definition: ModelFitter.h:105
void setQuality(Size index, QualityType q)
Set the quality in dimension c.
IsotopeCluster::IndexSet IndexSet
A set of peak indices.
Definition: FeatureFinderDefs.h:60
void addConvexHull(const FeatureFinderDefs::IndexSet &set, Feature &feature) const
Calculates the convex hull of a index set and adds it to the feature.
Definition: FeaFiModule.h:393
Retention time iterator for a FeatureFinderDefs::IndexSet.
Definition: FeaFiModule.h:132
CoordinateType isotope_stdev_
isotope stdev
Definition: ModelFitter.h:780
CoordinateType monoisotopic_mz_
monoistopic mass
Definition: ModelFitter.h:758
RealType variance1() const
"variance to the left hand side"
Definition: AsymmetricStatistics.h:81
static StringList create(const String &list, const char splitter= ',')
Returns a list that is created by splitting the given (comma-separated) string (String are not trimme...
Tests a group of data points in an LC-MS map for goodness-of-fit with a 2D averagine model...
Definition: ModelFitter.h:77
String algorithm_
algorithm
Definition: ModelFitter.h:782
Real QualityType
Type of quality values.
Definition: BaseFeature.h:62
DBoundingBox< 2 > getBoundingBox() const
returns the bounding box of the feature hull points
Exception indicating that an invalid parameter was handed over to an algorithm.
Definition: Exception.h:348
QualityType evaluate_(const IndexSet &set) const
evaluate 2d-model
Definition: ModelFitter.h:576
An LC-MS feature.
Definition: Feature.h:66
void setScale(IntensityType scale)
set the intensity scaling factor
Definition: ProductModel.h:217
static DoubleReal rankCorrelationCoefficient(IteratorType1 begin_a, IteratorType1 end_a, IteratorType2 begin_b, IteratorType2 end_b)
calculates the rank correlation coefficient for the values in [begin_a, end_a) and [begin_b...
Definition: StatisticFunctions.h:379
Int max_isotope_
maximum isotopic rank to be considered
Definition: ModelFitter.h:768
void setValidStrings(const String &key, const std::vector< String > &strings)
Sets the valid strings for the parameter key.
Management and storage of parameters / INI files.
Definition: Param.h:69
CoordinateType getMZ() const
Returns the m/z coordinate (index 1)
Definition: Peak2D.h:191
Definition: ModelFitter.h:101
Representation of a mass spectrometry experiment.
Definition: MSExperiment.h:68
m/z iterator for a FeatureFinderDefs::IndexSet
Definition: FeaFiModule.h:110
QualityType fitMZ_(InterpolationModel *&model, const ChargeType &charge) const
1d fit in MZ
Definition: ModelFitter.h:651
QualityType quality_mz_
fit quality in MZ dimension
Definition: ModelFitter.h:795
Intensity iterator for a FeatureFinderDefs::IndexSet.
Definition: FeaFiModule.h:88
void setOverallQuality(QualityType q)
Set the overall quality.
Int last_mz_model_
last mz model
Definition: ModelFitter.h:778
void doProjectionDim_(const Int dim, const ChargedIndexSet &index_set, RawDataArrayType &set) const
Project the raw data into 1-dim array.
Definition: ModelFitter.h:709
Exception used if an error occurred while fitting a model to a given dataset.
Definition: Exception.h:662
void setMinInt(const String &key, Int min)
Sets the minimum value for the integer or integer list parameter key.
virtual bool isContained(const PositionType &pos) const
check if position pos is part of the model regarding the models cut-off.
Definition: BaseModel.h:101
index set with associated charge estimate
Definition: IsotopeCluster.h:53
Abstract base class for all 1D-dimensional model fitter.
Definition: Fitter1D.h:59
CoordinateType iso_stdev_last_
last stdev
Definition: ModelFitter.h:772
void setCharge(const ChargeType &ch)
Set charge state.
#define DEBUG_FEATUREFINDER
Definition: EGHFitter1D.h:38
Int charge
charge estimate (convention: zero means "no charge estimate")
Definition: IsotopeCluster.h:62
The main feature finder class.
Definition: FeatureFinder.h:57
void setName(const String &name)
Mutable access to the name.
Math::AsymmetricStatistics rt_stat_
statistics for rt
Definition: ModelFitter.h:750
virtual ~ModelFitter()
Destructor.
Definition: ModelFitter.h:209
const Flag & getPeakFlag(const IndexPair &index) const
Returns a non-mutable reference to a peak flag.
Definition: FeatureFinder.h:91
Math::BasicStatistics mz_stat_
statistics for mz
Definition: ModelFitter.h:748
Feature::IntensityType IntensityType
Single intensity.
Definition: ModelFitter.h:90
int Int
Signed integer type.
Definition: Types.h:100
void setMinFloat(const String &key, DoubleReal min)
Sets the minimum value for the floating point or floating point list parameter key.
Feature fit(const ChargedIndexSet &index_set)
Return next feature.
Definition: ModelFitter.h:226
virtual void setCutOff(IntensityType cut_off)
set cutoff value
Definition: BaseModel.h:135
ModelFitter & operator=(const ModelFitter &)
Not implemented.
void defaultsToParam_()
Updates the parameters after the defaults have been set in the constructor.
void setMaxFloat(const String &key, DoubleReal max)
Sets the maximum value for the floating point or floating point list parameter key.
CoordinateType iso_stdev_first_
first stdev
Definition: ModelFitter.h:770