Home  · Classes  · Annotated Classes  · Modules  · Members  · Namespaces  · Related Pages
SignalToNoiseEstimatorMedian.h
Go to the documentation of this file.
1 // --------------------------------------------------------------------------
2 // OpenMS -- Open-Source Mass Spectrometry
3 // --------------------------------------------------------------------------
4 // Copyright The OpenMS Team -- Eberhard Karls University Tuebingen,
5 // ETH Zurich, and Freie Universitaet Berlin 2002-2013.
6 //
7 // This software is released under a three-clause BSD license:
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above copyright
11 // notice, this list of conditions and the following disclaimer in the
12 // documentation and/or other materials provided with the distribution.
13 // * Neither the name of any author or any participating institution
14 // may be used to endorse or promote products derived from this software
15 // without specific prior written permission.
16 // For a full list of authors, refer to the file AUTHORS.
17 // --------------------------------------------------------------------------
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 // ARE DISCLAIMED. IN NO EVENT SHALL ANY OF THE AUTHORS OR THE CONTRIBUTING
22 // INSTITUTIONS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
25 // OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
27 // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
28 // ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 //
30 // --------------------------------------------------------------------------
31 // $Maintainer: Chris Bielow $
32 // $Authors: $
33 // --------------------------------------------------------------------------
34 //
35 
36 #ifndef OPENMS_FILTERING_NOISEESTIMATION_SIGNALTONOISEESTIMATORMEDIAN_H
37 #define OPENMS_FILTERING_NOISEESTIMATION_SIGNALTONOISEESTIMATORMEDIAN_H
38 
39 
43 #include <vector>
44 
45 namespace OpenMS
46 {
70  template <typename Container = MSSpectrum<> >
72  public SignalToNoiseEstimator<Container>
73  {
74 
75 public:
76 
79 
86 
89 
91 
94  {
95  //set the name for DefaultParamHandler error messages
96  this->setName("SignalToNoiseEstimatorMedian");
97 
98  defaults_.setValue("max_intensity", -1, "maximal intensity considered for histogram construction. By default, it will be calculated automatically (see auto_mode)." \
99  " Only provide this parameter if you know what you are doing (and change 'auto_mode' to '-1')!" \
100  " All intensities EQUAL/ABOVE 'max_intensity' will be added to the LAST histogram bin." \
101  " If you choose 'max_intensity' too small, the noise estimate might be too small as well. " \
102  " If chosen too big, the bins become quite large (which you could counter by increasing 'bin_count', which increases runtime)." \
103  " In general, the Median-S/N estimator is more robust to a manual max_intensity than the MeanIterative-S/N.", StringList::create("advanced"));
104  defaults_.setMinInt("max_intensity", -1);
105 
106  defaults_.setValue("auto_max_stdev_factor", 3.0, "parameter for 'max_intensity' estimation (if 'auto_mode' == 0): mean + 'auto_max_stdev_factor' * stdev", StringList::create("advanced"));
107  defaults_.setMinFloat("auto_max_stdev_factor", 0.0);
108  defaults_.setMaxFloat("auto_max_stdev_factor", 999.0);
109 
110  defaults_.setValue("auto_max_percentile", 95, "parameter for 'max_intensity' estimation (if 'auto_mode' == 1): auto_max_percentile th percentile", StringList::create("advanced"));
111  defaults_.setMinInt("auto_max_percentile", 0);
112  defaults_.setMaxInt("auto_max_percentile", 100);
113 
114  defaults_.setValue("auto_mode", 0, "method to use to determine maximal intensity: -1 --> use 'max_intensity'; 0 --> 'auto_max_stdev_factor' method (default); 1 --> 'auto_max_percentile' method", StringList::create("advanced"));
115  defaults_.setMinInt("auto_mode", -1);
116  defaults_.setMaxInt("auto_mode", 1);
117 
118  defaults_.setValue("win_len", 200.0, "window length in Thomson");
119  defaults_.setMinFloat("win_len", 1.0);
120 
121  defaults_.setValue("bin_count", 30, "number of bins for intensity values");
122  defaults_.setMinInt("bin_count", 3);
123 
124  defaults_.setValue("min_required_elements", 10, "minimum number of elements required in a window (otherwise it is considered sparse)");
125  defaults_.setMinInt("min_required_elements", 1);
126 
127  defaults_.setValue("noise_for_empty_window", std::pow(10.0, 20), "noise value used for sparse windows", StringList::create("advanced"));
128 
129 
131  }
132 
135  SignalToNoiseEstimator<Container>(source)
136  {
137  updateMembers_();
138  }
139 
145  {
146  if (&source == this) return *this;
147 
149  updateMembers_();
150  return *this;
151  }
152 
154 
155 
158  {}
159 
160 
161 protected:
162 
163 
169  void computeSTN_(const PeakIterator & scan_first_, const PeakIterator & scan_last_)
170  {
171  // reset counter for sparse windows
172  double sparse_window_percent = 0;
173  // reset counter for histogram overflow
174  double histogram_oob_percent = 0;
175 
176  // reset the results
177  stn_estimates_.clear();
178 
179  // maximal range of histogram needs to be calculated first
180  if (auto_mode_ == AUTOMAXBYSTDEV)
181  {
182  // use MEAN+auto_max_intensity_*STDEV as threshold
183  GaussianEstimate gauss_global = SignalToNoiseEstimator<Container>::estimate_(scan_first_, scan_last_);
184  max_intensity_ = gauss_global.mean + std::sqrt(gauss_global.variance) * auto_max_stdev_Factor_;
185  }
186  else if (auto_mode_ == AUTOMAXBYPERCENT)
187  {
188  // get value at "auto_max_percentile_"th percentile
189  // we use a histogram approach here as well.
190  if ((auto_max_percentile_ < 0) || (auto_max_percentile_ > 100))
191  {
193  throw Exception::InvalidValue(__FILE__,
194  __LINE__,
195  __PRETTY_FUNCTION__,
196  "auto_mode is on AUTOMAXBYPERCENT! auto_max_percentile is not in [0,100]. Use setAutoMaxPercentile(<value>) to change it!",
197  s);
198  }
199 
200  std::vector<int> histogram_auto(100, 0);
201 
202  // find maximum of current scan
203  int size = 0;
204  typename PeakType::IntensityType maxInt = 0;
205  PeakIterator run = scan_first_;
206  while (run != scan_last_)
207  {
208  maxInt = std::max(maxInt, (*run).getIntensity());
209  ++size;
210  ++run;
211  }
212 
213  double bin_size = maxInt / 100;
214 
215  // fill histogram
216  run = scan_first_;
217  while (run != scan_last_)
218  {
219  ++histogram_auto[(int) (((*run).getIntensity() - 1) / bin_size)];
220  ++run;
221  }
222 
223  // add up element counts in histogram until ?th percentile is reached
224  int elements_below_percentile = (int) (auto_max_percentile_ * size / 100);
225  int elements_seen = 0;
226  int i = -1;
227  run = scan_first_;
228 
229  while (run != scan_last_ && elements_seen < elements_below_percentile)
230  {
231  ++i;
232  elements_seen += histogram_auto[i];
233  ++run;
234  }
235 
236  max_intensity_ = (((double)i) + 0.5) * bin_size;
237  }
238  else //if (auto_mode_ == MANUAL)
239  {
240  if (max_intensity_ <= 0)
241  {
243  throw Exception::InvalidValue(__FILE__,
244  __LINE__,
245  __PRETTY_FUNCTION__,
246  "auto_mode is on MANUAL! max_intensity is <=0. Needs to be positive! Use setMaxIntensity(<value>) or enable auto_mode!",
247  s);
248  }
249  }
250 
251  if (max_intensity_ < 0)
252  {
253  std::cerr << "TODO SignalToNoiseEstimatorMedian: the max_intensity_ value should be positive! " << max_intensity_ << std::endl;
254  return;
255  }
256 
257  PeakIterator window_pos_center = scan_first_;
258  PeakIterator window_pos_borderleft = scan_first_;
259  PeakIterator window_pos_borderright = scan_first_;
260 
261  double window_half_size = win_len_ / 2;
262  double bin_size = std::max(1.0, max_intensity_ / bin_count_); // at least size of 1 for intensity bins
263  int bin_count_minus_1 = bin_count_ - 1;
264 
265  std::vector<int> histogram(bin_count_, 0);
266  std::vector<double> bin_value(bin_count_, 0);
267  // calculate average intensity that is represented by a bin
268  for (int bin = 0; bin < bin_count_; bin++)
269  {
270  histogram[bin] = 0;
271  bin_value[bin] = (bin + 0.5) * bin_size;
272  }
273  // bin in which a datapoint would fall
274  int to_bin = 0;
275 
276  // index of bin where the median is located
277  int median_bin = 0;
278  // additive number of elements from left to x in histogram
279  int element_inc_count = 0;
280 
281  // tracks elements in current window, which may vary because of unevenly spaced data
282  int elements_in_window = 0;
283  // number of windows
284  int window_count = 0;
285 
286  // number of elements where we find the median
287  int element_in_window_half = 0;
288 
289  double noise; // noise value of a datapoint
290 
291  // determine how many elements we need to estimate (for progress estimation)
292  int windows_overall = 0;
293  PeakIterator run = scan_first_;
294  while (run != scan_last_)
295  {
296  ++windows_overall;
297  ++run;
298  }
299  SignalToNoiseEstimator<Container>::startProgress(0, windows_overall, "noise estimation of data");
300 
301  // MAIN LOOP
302  while (window_pos_center != scan_last_)
303  {
304 
305  // erase all elements from histogram that will leave the window on the LEFT side
306  while ((*window_pos_borderleft).getMZ() < (*window_pos_center).getMZ() - window_half_size)
307  {
308  to_bin = std::max(std::min<int>((int)((*window_pos_borderleft).getIntensity() / bin_size), bin_count_minus_1), 0);
309  --histogram[to_bin];
310  --elements_in_window;
311  ++window_pos_borderleft;
312  }
313 
314  // add all elements to histogram that will enter the window on the RIGHT side
315  while ((window_pos_borderright != scan_last_)
316  && ((*window_pos_borderright).getMZ() <= (*window_pos_center).getMZ() + window_half_size))
317  {
318  //std::cerr << (*window_pos_borderright).getIntensity() << " " << bin_size << " " << bin_count_minus_1 << std::endl;
319  to_bin = std::max(std::min<int>((int)((*window_pos_borderright).getIntensity() / bin_size), bin_count_minus_1), 0);
320  ++histogram[to_bin];
321  ++elements_in_window;
322  ++window_pos_borderright;
323  }
324 
325  if (elements_in_window < min_required_elements_)
326  {
327  noise = noise_for_empty_window_;
328  ++sparse_window_percent;
329  }
330  else
331  {
332  // find bin i where ceil[elements_in_window/2] <= sum_c(0..i){ histogram[c] }
333  median_bin = -1;
334  element_inc_count = 0;
335  element_in_window_half = (elements_in_window + 1) / 2;
336  while (median_bin < bin_count_minus_1 && element_inc_count < element_in_window_half)
337  {
338  ++median_bin;
339  element_inc_count += histogram[median_bin];
340  }
341 
342  // increase the error count
343  if (median_bin == bin_count_minus_1) {++histogram_oob_percent; }
344 
345  // just avoid division by 0
346  noise = std::max(1.0, bin_value[median_bin]);
347  }
348 
349  // store result
350  stn_estimates_[*window_pos_center] = (*window_pos_center).getIntensity() / noise;
351 
352 
353  // advance the window center by one datapoint
354  ++window_pos_center;
355  ++window_count;
356  // update progress
358 
359  } // end while
360 
362 
363  sparse_window_percent = sparse_window_percent * 100 / window_count;
364  histogram_oob_percent = histogram_oob_percent * 100 / window_count;
365 
366  // warn if percentage of sparse windows is above 20%
367  if (sparse_window_percent > 20)
368  {
369  LOG_WARN << "WARNING in SignalToNoiseEstimatorMedian: "
370  << sparse_window_percent
371  << "% of all windows were sparse. You should consider increasing 'win_len' or decreasing 'min_required_elements'"
372  << std::endl;
373  }
374 
375  // warn if percentage of possibly wrong median estimates is above 1%
376  if (histogram_oob_percent > 1)
377  {
378  LOG_WARN << "WARNING in SignalToNoiseEstimatorMedian: "
379  << histogram_oob_percent
380  << "% of all Signal-to-Noise estimates are too high, because the median was found in the rightmost histogram-bin. "
381  << "You should consider increasing 'max_intensity' (and maybe 'bin_count' with it, to keep bin width reasonable)"
382  << std::endl;
383  }
384 
385  } // end of shiftWindow_
386 
389  {
390  max_intensity_ = (double)param_.getValue("max_intensity");
391  auto_max_stdev_Factor_ = (double)param_.getValue("auto_max_stdev_factor");
392  auto_max_percentile_ = param_.getValue("auto_max_percentile");
393  auto_mode_ = param_.getValue("auto_mode");
394  win_len_ = (double)param_.getValue("win_len");
395  bin_count_ = param_.getValue("bin_count");
396  min_required_elements_ = param_.getValue("min_required_elements");
397  noise_for_empty_window_ = (double)param_.getValue("noise_for_empty_window");
398  is_result_valid_ = false;
399  }
400 
410  double win_len_;
418 
419 
420 
421  };
422 
423 } // namespace OpenMS
424 
425 #endif //OPENMS_FILTERING_NOISEESTIMATION_DSIGNALTONOISEESTIMATORMEDIAN_H
Real IntensityType
Intensity type.
Definition: Peak2D.h:63
Param defaults_
Container for default parameters. This member should be filled in the constructor of derived classes!...
Definition: DefaultParamHandler.h:155
void setValue(const String &key, const DataValue &value, const String &description="", const StringList &tags=StringList())
Sets a value.
SignalToNoiseEstimator< Container >::PeakType PeakType
Definition: SignalToNoiseEstimatorMedian.h:88
A more convenient string class.
Definition: String.h:56
double auto_max_percentile_
parameter for initial automatic estimation of &quot;max_intensity_&quot; percentile or a stdev ...
Definition: SignalToNoiseEstimatorMedian.h:406
Definition: SignalToNoiseEstimatorMedian.h:78
int auto_mode_
determines which method shall be used for estimating &quot;max_intensity_&quot;. valid are MANUAL=-1, AUTOMAXBYSTDEV=0 or AUTOMAXBYPERCENT=1
Definition: SignalToNoiseEstimatorMedian.h:408
Param param_
Container for current parameters.
Definition: DefaultParamHandler.h:148
void computeSTN_(const PeakIterator &scan_first_, const PeakIterator &scan_last_)
Definition: SignalToNoiseEstimatorMedian.h:169
int bin_count_
number of bins in the histogram
Definition: SignalToNoiseEstimatorMedian.h:412
SignalToNoiseEstimator & operator=(const SignalToNoiseEstimator &source)
Assignment operator.
Definition: SignalToNoiseEstimator.h:93
double win_len_
range of data points which belong to a window in Thomson
Definition: SignalToNoiseEstimatorMedian.h:410
Container::const_iterator PeakIterator
Definition: SignalToNoiseEstimator.h:66
double max_intensity_
maximal intensity considered during binning (values above get discarded)
Definition: SignalToNoiseEstimatorMedian.h:402
SignalToNoiseEstimator< Container >::GaussianEstimate GaussianEstimate
Definition: SignalToNoiseEstimatorMedian.h:90
GaussianEstimate estimate_(const PeakIterator &scan_first_, const PeakIterator &scan_last_) const
calculate mean &amp; stdev of intensities of a spectrum
Definition: SignalToNoiseEstimator.h:176
SignalToNoiseEstimatorMedian(const SignalToNoiseEstimatorMedian &source)
Copy Constructor.
Definition: SignalToNoiseEstimatorMedian.h:134
#define LOG_WARN
Macro if a warning, a piece of information which should be read by the user, should be logged...
Definition: LogStream.h:451
bool is_result_valid_
flag: set to true if SignalToNoise estimates are calculated and none of the params were changed...
Definition: SignalToNoiseEstimator.h:216
void setMaxInt(const String &key, Int max)
Sets the maximum value for the integer or integer list parameter key.
void updateMembers_()
overridden function from DefaultParamHandler to keep members up to date, when a parameter is changed ...
Definition: SignalToNoiseEstimatorMedian.h:388
protected struct to store parameters my, sigma for a Gaussian distribution
Definition: SignalToNoiseEstimator.h:168
void endProgress() const
Ends the progress display.
const DataValue & getValue(const String &key) const
Returns a value of a parameter.
double auto_max_stdev_Factor_
parameter for initial automatic estimation of &quot;max_intensity_&quot;: a stdev multiplier ...
Definition: SignalToNoiseEstimatorMedian.h:404
IntensityThresholdCalculation
method to use for estimating the maximal intensity that is used for histogram calculation ...
Definition: SignalToNoiseEstimatorMedian.h:78
PeakIterator::value_type PeakType
Definition: SignalToNoiseEstimator.h:67
static StringList create(const String &list, const char splitter= ',')
Returns a list that is created by splitting the given (comma-separated) string (String are not trimme...
double noise_for_empty_window_
Definition: SignalToNoiseEstimatorMedian.h:417
Definition: SignalToNoiseEstimatorMedian.h:78
Invalid value exception.
Definition: Exception.h:336
SignalToNoiseEstimatorMedian & operator=(const SignalToNoiseEstimatorMedian &source)
Definition: SignalToNoiseEstimatorMedian.h:144
Estimates the signal/noise (S/N) ratio of each data point in a scan by using the median (histogram ba...
Definition: SignalToNoiseEstimatorMedian.h:71
SignalToNoiseEstimatorMedian()
default constructor
Definition: SignalToNoiseEstimatorMedian.h:93
int min_required_elements_
minimal number of elements a window needs to cover to be used
Definition: SignalToNoiseEstimatorMedian.h:414
void setMinInt(const String &key, Int min)
Sets the minimum value for the integer or integer list parameter key.
This class represents the abstract base class of a signal to noise estimator.
Definition: SignalToNoiseEstimator.h:58
std::map< PeakType, double, typename PeakType::PositionLess > stn_estimates_
stores the noise estimate for each peak
Definition: SignalToNoiseEstimator.h:209
void startProgress(SignedSize begin, SignedSize end, const String &label) const
Initializes the progress display.
void setProgress(SignedSize value) const
Sets the current progress.
void setName(const String &name)
Mutable access to the name.
virtual ~SignalToNoiseEstimatorMedian()
Destructor.
Definition: SignalToNoiseEstimatorMedian.h:157
Definition: SignalToNoiseEstimatorMedian.h:78
void setMinFloat(const String &key, DoubleReal min)
Sets the minimum value for the floating point or floating point list parameter key.
SignalToNoiseEstimator< Container >::PeakIterator PeakIterator
Definition: SignalToNoiseEstimatorMedian.h:87
void defaultsToParam_()
Updates the parameters after the defaults have been set in the constructor.
void setMaxFloat(const String &key, DoubleReal max)
Sets the maximum value for the floating point or floating point list parameter key.

OpenMS / TOPP release 1.11.1 Documentation generated on Thu Nov 14 2013 11:19:20 using doxygen 1.8.5