Home  · Classes  · Annotated Classes  · Modules  · Members  · Namespaces  · Related Pages
PeakPickerHiRes.h
Go to the documentation of this file.
1 // --------------------------------------------------------------------------
2 // OpenMS -- Open-Source Mass Spectrometry
3 // --------------------------------------------------------------------------
4 // Copyright The OpenMS Team -- Eberhard Karls University Tuebingen,
5 // ETH Zurich, and Freie Universitaet Berlin 2002-2013.
6 //
7 // This software is released under a three-clause BSD license:
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above copyright
11 // notice, this list of conditions and the following disclaimer in the
12 // documentation and/or other materials provided with the distribution.
13 // * Neither the name of any author or any participating institution
14 // may be used to endorse or promote products derived from this software
15 // without specific prior written permission.
16 // For a full list of authors, refer to the file AUTHORS.
17 // --------------------------------------------------------------------------
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 // ARE DISCLAIMED. IN NO EVENT SHALL ANY OF THE AUTHORS OR THE CONTRIBUTING
22 // INSTITUTIONS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
25 // OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
27 // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
28 // ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 //
30 // --------------------------------------------------------------------------
31 // $Maintainer: Erhan Kenar $
32 // --------------------------------------------------------------------------
33 
34 #ifndef OPENMS_TRANSFORMATIONS_RAW2PEAK_PEAKPICKERHIRES_H
35 #define OPENMS_TRANSFORMATIONS_RAW2PEAK_PEAKPICKERHIRES_H
36 
41 
43 
44 #include <gsl/gsl_spline.h>
45 #include <gsl/gsl_interp.h>
46 
47 #include <map>
48 
49 
50 #define DEBUG_PEAK_PICKING
51 #undef DEBUG_PEAK_PICKING
52 //#undef DEBUG_DECONV
53 namespace OpenMS
54 {
68  class OPENMS_DLLAPI PeakPickerHiRes :
69  public DefaultParamHandler,
70  public ProgressLogger
71  {
72 public:
75 
77  virtual ~PeakPickerHiRes();
78 
82  template <typename PeakType>
83  void pick(const MSSpectrum<PeakType> & input, MSSpectrum<PeakType> & output) const
84  {
85  // copy meta data of the input spectrum
86  output.clear(true);
87  output.SpectrumSettings::operator=(input);
88  output.MetaInfoInterface::operator=(input);
89  output.setRT(input.getRT());
90  output.setMSLevel(input.getMSLevel());
91  output.setName(input.getName());
93 
94  // don't pick a spectrum with less than 5 data points
95  if (input.size() < 5) return;
96 
97  // signal-to-noise estimation
99 
100  if (signal_to_noise_ > 0.0)
101  {
102  snt.init(input);
103  }
104 
105  // find local maxima in raw data
106  for (Size i = 2; i < input.size() - 2; ++i)
107  {
108  double central_peak_mz = input[i].getMZ(), central_peak_int = input[i].getIntensity();
109  double left_neighbor_mz = input[i - 1].getMZ(), left_neighbor_int = input[i - 1].getIntensity();
110  double right_neighbor_mz = input[i + 1].getMZ(), right_neighbor_int = input[i + 1].getIntensity();
111 
112  // MZ spacing sanity checks
113  double left_to_central = std::fabs(central_peak_mz - left_neighbor_mz);
114  double central_to_right = std::fabs(right_neighbor_mz - central_peak_mz);
115  double min_spacing = (left_to_central < central_to_right) ? left_to_central : central_to_right;
116 
117  double act_snt = 0.0, act_snt_l1 = 0.0, act_snt_r1 = 0.0;
118 
119  if (signal_to_noise_ > 0.0)
120  {
121  act_snt = snt.getSignalToNoise(input[i]);
122  act_snt_l1 = snt.getSignalToNoise(input[i - 1]);
123  act_snt_r1 = snt.getSignalToNoise(input[i + 1]);
124  }
125 
126  // look for peak cores meeting MZ and intensity/SNT criteria
127  if (act_snt >= signal_to_noise_
128  && left_to_central < 1.5 * min_spacing
129  && central_peak_int > left_neighbor_int
130  && act_snt_l1 >= signal_to_noise_
131  && central_to_right < 1.5 * min_spacing
132  && central_peak_int > right_neighbor_int
133  && act_snt_r1 >= signal_to_noise_)
134  {
135  // special case: if a peak core is surrounded by more intense
136  // satellite peaks (indicates oscillation rather than
137  // real peaks) -> remove
138 
139  double act_snt_l2 = 0.0, act_snt_r2 = 0.0;
140 
141  if (signal_to_noise_ > 0.0)
142  {
143  act_snt_l2 = snt.getSignalToNoise(input[i - 2]);
144  act_snt_r2 = snt.getSignalToNoise(input[i + 2]);
145  }
146 
147  if ((i > 1
148  && std::fabs(left_neighbor_mz - input[i - 2].getMZ()) < 1.5 * min_spacing
149  && left_neighbor_int < input[i - 2].getIntensity()
150  && act_snt_l2 >= signal_to_noise_)
151  &&
152  ((i + 2) < input.size()
153  && std::fabs(input[i + 2].getMZ() - right_neighbor_mz) < 1.5 * min_spacing
154  && right_neighbor_int < input[i + 2].getIntensity()
155  && act_snt_r2 >= signal_to_noise_)
156  )
157  {
158  ++i;
159  continue;
160  }
161 
162 
163  std::map<double, double> peak_raw_data;
164 
165  peak_raw_data[central_peak_mz] = central_peak_int;
166  peak_raw_data[left_neighbor_mz] = left_neighbor_int;
167  peak_raw_data[right_neighbor_mz] = right_neighbor_int;
168 
169 
170  // peak core found, now extend it
171  // to the left
172  Size k = 2;
173 
174  Size missing_left(0);
175  Size missing_right(0);
176 
177  while ((i - k + 1) > 0
178  && (missing_left < 2)
179  && input[i - k].getIntensity() <= peak_raw_data.begin()->second)
180  {
181 
182  double act_snt_lk = 0.0;
183 
184  if (signal_to_noise_ > 0.0)
185  {
186  act_snt_lk = snt.getSignalToNoise(input[i - k]);
187  }
188 
189 
190  if (act_snt_lk >= signal_to_noise_ && std::fabs(input[i - k].getMZ() - peak_raw_data.begin()->first) < 1.5 * min_spacing)
191  {
192  peak_raw_data[input[i - k].getMZ()] = input[i - k].getIntensity();
193  }
194  else
195  {
196  peak_raw_data[input[i - k].getMZ()] = input[i - k].getIntensity();
197  ++missing_left;
198  }
199 
200  ++k;
201 
202  }
203 
204  // to the right
205  k = 2;
206  while ((i + k) < input.size()
207  && (missing_right < 2)
208  && input[i + k].getIntensity() <= peak_raw_data.rbegin()->second)
209  {
210 
211  double act_snt_rk = 0.0;
212 
213  if (signal_to_noise_ > 0.0)
214  {
215  act_snt_rk = snt.getSignalToNoise(input[i + k]);
216  }
217 
218  if (act_snt_rk >= signal_to_noise_ && std::fabs(input[i + k].getMZ() - peak_raw_data.rbegin()->first) < 1.5 * min_spacing)
219  {
220  peak_raw_data[input[i + k].getMZ()] = input[i + k].getIntensity();
221  }
222  else
223  {
224  peak_raw_data[input[i + k].getMZ()] = input[i + k].getIntensity();
225  ++missing_right;
226  }
227 
228  ++k;
229  }
230 
231 
232  // output all raw data points selected for one peak
233  // TODO: #ifdef DEBUG_ ...
234  // for (std::map<double, double>::const_iterator map_it = peak_raw_data.begin(); map_it != peak_raw_data.end(); ++map_it) {
235  // PeakType peak;
236  // peak.setMZ(map_it->first);
237  // peak.setIntensity(map_it->second);
238  // output.push_back(peak);
239  // std::cout << map_it->first << " " << map_it->second << " snt: " << std::endl;
240  // }
241  // std::cout << "--------------------" << std::endl;
242 
243  const Size num_raw_points = peak_raw_data.size();
244 
245  std::vector<double> raw_mz_values;
246  std::vector<double> raw_int_values;
247 
248  for (std::map<double, double>::const_iterator map_it = peak_raw_data.begin(); map_it != peak_raw_data.end(); ++map_it)
249  {
250  raw_mz_values.push_back(map_it->first);
251  raw_int_values.push_back(map_it->second);
252  }
253 
254  // setup gsl splines
255  gsl_interp_accel * spline_acc = gsl_interp_accel_alloc();
256  gsl_interp_accel * first_deriv_acc = gsl_interp_accel_alloc();
257  gsl_spline * peak_spline = gsl_spline_alloc(gsl_interp_cspline, num_raw_points);
258  gsl_spline_init(peak_spline, &(*raw_mz_values.begin()), &(*raw_int_values.begin()), num_raw_points);
259 
260 
261  // calculate maximum by evaluating the spline's 1st derivative
262  // (bisection method)
263  double max_peak_mz = central_peak_mz, max_peak_int = central_peak_int;
264  double threshold = 0.000001;
265  double lefthand = left_neighbor_mz;
266  double righthand = right_neighbor_mz;
267 
268  bool lefthand_sign = 1;
269  double eps = std::numeric_limits<double>::epsilon();
270 
271 
272  // bisection
273  do
274  {
275  double mid = (lefthand + righthand) / 2;
276 
277  double midpoint_deriv_val = gsl_spline_eval_deriv(peak_spline, mid, first_deriv_acc);
278 
279  // if deriv nearly zero then maximum already found
280  if (!(std::fabs(midpoint_deriv_val) > eps))
281  {
282  break;
283  }
284 
285  bool midpoint_sign = (midpoint_deriv_val < 0.0) ? 0 : 1;
286 
287  if (lefthand_sign ^ midpoint_sign)
288  {
289  righthand = mid;
290  }
291  else
292  {
293  lefthand = mid;
294  }
295 
296  // TODO: #ifdef DEBUG_ ...
297  // PeakType peak;
298  // peak.setMZ(mid);
299  // peak.setIntensity(gsl_spline_eval(peak_spline, mid, spline_acc));
300  // output.push_back(peak);
301 
302  }
303  while (std::fabs(lefthand - righthand) > threshold);
304 
305  // sanity check?
306  max_peak_mz = (lefthand + righthand) / 2;
307  max_peak_int = gsl_spline_eval(peak_spline, max_peak_mz, spline_acc);
308 
309  // save picked pick into output spectrum
310  PeakType peak;
311  peak.setMZ(max_peak_mz);
312  peak.setIntensity(max_peak_int);
313  output.push_back(peak);
314 
315  // free allocated gsl memory
316  gsl_spline_free(peak_spline);
317  gsl_interp_accel_free(spline_acc);
318  gsl_interp_accel_free(first_deriv_acc);
319 
320  // jump over raw data points that have been considered already
321  i = i + k - 1;
322  }
323  }
324 
325  return;
326  }
327 
328  template <typename PeakType>
329  void pick(const MSChromatogram<PeakType> & input, MSChromatogram<PeakType> & output) const
330  {
331  // copy meta data of the input chromatogram
332  output.clear(true);
333  output.ChromatogramSettings::operator=(input);
334  output.MetaInfoInterface::operator=(input);
335  output.setName(input.getName());
336 
337  MSSpectrum<PeakType> input_spectrum;
338  MSSpectrum<PeakType> output_spectrum;
339  for (typename MSChromatogram<PeakType>::const_iterator it = input.begin(); it != input.end(); ++it)
340  {
341  input_spectrum.push_back(*it);
342  }
343  pick(input_spectrum, output_spectrum);
344  for (typename MSSpectrum<PeakType>::const_iterator it = output_spectrum.begin(); it != output_spectrum.end(); ++it)
345  {
346  output.push_back(*it);
347  }
348 
349  }
350 
354  template <typename PeakType, typename ChromatogramPeakT>
356  {
357  // make sure that output is clear
358  output.clear(true);
359 
360  // copy experimental settings
361  static_cast<ExperimentalSettings &>(output) = input;
362 
363  // resize output with respect to input
364  output.resize(input.size());
365 
366  bool ms1_only = param_.getValue("ms1_only").toBool();
367  Size progress = 0;
368 
369  startProgress(0, input.size() + input.getChromatograms().size(), "smoothing data");
370  for (Size scan_idx = 0; scan_idx != input.size(); ++scan_idx)
371  {
372  if (ms1_only && (input[scan_idx].getMSLevel() != 1))
373  {
374  output[scan_idx] = input[scan_idx];
375  }
376  else
377  {
378  pick(input[scan_idx], output[scan_idx]);
379  }
380  setProgress(++progress);
381  }
382  for (Size i = 0; i < input.getChromatograms().size(); ++i)
383  {
385  pick(input.getChromatograms()[i], chromatogram);
386  output.addChromatogram(chromatogram);
387  setProgress(++progress);
388  }
389 
390  endProgress();
391 
392  return;
393  }
394 
395 protected:
396  // signal-to-noise parameter
398 
399  //docu in base class
400  void updateMembers_();
401 
402  }; // end PeakPickerHiRes
403 
404 } // namespace OpenMS
405 
406 #endif
const double k
virtual double getSignalToNoise(const PeakIterator &data_point)
Definition: SignalToNoiseEstimator.h:130
const String & getName() const
Definition: MSChromatogram.h:194
A 2-dimensional raw data point or peak.
Definition: Peak2D.h:55
UInt getMSLevel() const
Returns the MS level.
Definition: MSSpectrum.h:231
Size size() const
Definition: MSExperiment.h:117
void setMZ(CoordinateType coordinate)
Mutable access to the m/z coordinate (index 1)
Definition: Peak2D.h:197
Peak data (also called centroided data or stick data)
Definition: SpectrumSettings.h:74
The representation of a chromatogram.
Definition: MSChromatogram.h:53
const String & getName() const
Returns the name.
Definition: MSSpectrum.h:243
void setName(const String &name)
Sets the name.
Definition: MSSpectrum.h:249
void resize(Size s)
Definition: MSExperiment.h:122
void setName(const String &name)
Sets the name.
Definition: MSChromatogram.h:200
void addChromatogram(const MSChromatogram< ChromatogramPeakType > &chromatogram)
adds a chromatogram to the list
Definition: MSExperiment.h:762
void setIntensity(IntensityType intensity)
Non-mutable access to the data point intensity (height)
Definition: Peak2D.h:167
virtual void init(const PeakIterator &it_begin, const PeakIterator &it_end)
Set the start and endpoint of the raw data interval, for which signal to noise ratios will be estimat...
Definition: SignalToNoiseEstimator.h:111
void pick(const MSSpectrum< PeakType > &input, MSSpectrum< PeakType > &output) const
Applies the peak-picking algorithm to a single spectrum (MSSpectrum). The resulting picked peaks are ...
Definition: PeakPickerHiRes.h:83
void setMSLevel(UInt ms_level)
Sets the MS level.
Definition: MSSpectrum.h:237
void clear(bool clear_meta_data)
Clears all data and meta data.
Definition: MSSpectrum.h:605
Representation of a mass spectrometry experiment.
Definition: MSExperiment.h:68
double signal_to_noise_
Definition: PeakPickerHiRes.h:397
Estimates the signal/noise (S/N) ratio of each data point in a scan by using the median (histogram ba...
Definition: SignalToNoiseEstimatorMedian.h:71
void setType(SpectrumType type)
sets the spectrum type
void setRT(DoubleReal rt)
Sets the absolute retention time (is seconds)
Definition: MSSpectrum.h:221
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition: Types.h:144
void clear(bool clear_meta_data)
Clears all data and meta data.
Definition: MSExperiment.h:809
Base class for all classes that want to report their progess.
Definition: ProgressLogger.h:56
A base class for all classes handling default parameters.
Definition: DefaultParamHandler.h:90
DoubleReal getRT() const
Definition: MSSpectrum.h:215
void pickExperiment(const MSExperiment< PeakType, ChromatogramPeakT > &input, MSExperiment< PeakType, ChromatogramPeakT > &output) const
Applies the peak-picking algorithm to a map (MSExperiment). This method picks peaks for each scan in ...
Definition: PeakPickerHiRes.h:355
void clear(bool clear_meta_data)
Clears all data and meta data.
Definition: MSChromatogram.h:564
This class implements a fast peak-picking algorithm best suited for high resolution MS data (FT-ICR-M...
Definition: PeakPickerHiRes.h:68
Description of the experimental settings.
Definition: ExperimentalSettings.h:59
const std::vector< MSChromatogram< ChromatogramPeakType > > & getChromatograms() const
returns the chromatogram list
Definition: MSExperiment.h:768
void pick(const MSChromatogram< PeakType > &input, MSChromatogram< PeakType > &output) const
Definition: PeakPickerHiRes.h:329

OpenMS / TOPP release 1.11.1 Documentation generated on Thu Nov 14 2013 11:19:19 using doxygen 1.8.5