Home  · Classes  · Annotated Classes  · Modules  · Members  · Namespaces  · Related Pages
ChromatogramExtractor.h
Go to the documentation of this file.
1 // --------------------------------------------------------------------------
2 // OpenMS -- Open-Source Mass Spectrometry
3 // --------------------------------------------------------------------------
4 // Copyright The OpenMS Team -- Eberhard Karls University Tuebingen,
5 // ETH Zurich, and Freie Universitaet Berlin 2002-2013.
6 //
7 // This software is released under a three-clause BSD license:
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above copyright
11 // notice, this list of conditions and the following disclaimer in the
12 // documentation and/or other materials provided with the distribution.
13 // * Neither the name of any author or any participating institution
14 // may be used to endorse or promote products derived from this software
15 // without specific prior written permission.
16 // For a full list of authors, refer to the file AUTHORS.
17 // --------------------------------------------------------------------------
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 // ARE DISCLAIMED. IN NO EVENT SHALL ANY OF THE AUTHORS OR THE CONTRIBUTING
22 // INSTITUTIONS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
25 // OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
27 // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
28 // ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 //
30 // --------------------------------------------------------------------------
31 // $Maintainer: Hannes Roest $
32 // $Authors: Hannes Roest $
33 // --------------------------------------------------------------------------
34 
35 #ifndef OPENMS_ANALYSIS_OPENSWATH_CHROMATOGRAMEXTRACTOR_H
36 #define OPENMS_ANALYSIS_OPENSWATH_CHROMATOGRAMEXTRACTOR_H
37 
40 
41 // move to TOPPTool
43 #include <OpenMS/FORMAT/MzMLFile.h>
44 
45 #ifdef _OPENMP
46 #include <omp.h>
47 #endif
48 
49 namespace OpenMS
50 {
51 
61  class OPENMS_DLLAPI ChromatogramExtractor :
62  public ProgressLogger
63  {
64 
65 public:
66 
69  {
70  }
71 
74  {
75  }
76 
78  template <typename ExperimentT>
79  void extractChromatograms(const ExperimentT& input, ExperimentT& output, OpenMS::TargetedExperiment& transition_exp, double extract_window, bool ppm,
80  TransformationDescription trafo, double rt_extraction_window, String filter)
81  {
82 
83  // invert the trafo because we want to transform nRT values to "real" RT values
84  trafo.invert();
85 
86  Size input_size = input.size();
87  if (input_size < 1)
88  {
89  return;
90  }
91  SpectrumSettings settings = input[0];
92  int used_filter = -1;
93  if (filter == "tophat")
94  {
95  used_filter = 1;
96  }
97  else if (filter == "bartlett")
98  {
99  used_filter = 2;
100  }
101  else
102  {
103  throw Exception::IllegalArgument(__FILE__, __LINE__, __PRETTY_FUNCTION__,
104  "Filter either needs to be tophat or bartlett");
105  }
106 
107  // Store the peptide retention times in an intermediate map
108  PeptideRTMap_.clear();
109  for (Size i = 0; i < transition_exp.getPeptides().size(); i++)
110  {
111  const TargetedExperiment::Peptide& pep = transition_exp.getPeptides()[i];
112  if (pep.rts.empty() || pep.rts[0].getCVTerms()["MS:1000896"].empty())
113  {
114  // we dont have retention times -> this is only a problem if we actually
115  // wanted to use the RT limit feature.
116  if (rt_extraction_window >= 0)
117  {
118  throw Exception::IllegalArgument(__FILE__, __LINE__, __PRETTY_FUNCTION__,
119  "Error: Peptide " + pep.id + " does not have normalized retention times (term 1000896) which are necessary to perform an RT-limited extraction");
120  }
121  continue;
122  }
123  PeptideRTMap_[pep.id] = pep.rts[0].getCVTerms()["MS:1000896"][0].getValue().toString().toDouble();
124  }
125 
126  // sort the transition experiment by product mass
127  // this is essential because the algorithm assumes sorted transitions!
128  transition_exp.sortTransitionsByProductMZ();
129 
130  // prepare all the spectra (but leave them empty)
131  std::vector<typename ExperimentT::ChromatogramType> chromatograms;
132  prepareSpectra_(settings, chromatograms, transition_exp);
133 
134  //go through all spectra
135  startProgress(0, input_size, "Extracting chromatograms");
136  for (Size scan_idx = 0; scan_idx < input_size; ++scan_idx)
137  {
138  setProgress(scan_idx);
139 
140  if (input[scan_idx].size() == 0)
141  continue;
142 
143  Size peak_idx = 0;
144 
145  double mz;
146  double integrated_intensity = 0;
147 
148  // go through all transitions / chromatograms which are sorted by
149  // ProductMZ. We can use this to step through the spectrum and at the
150  // same time step through the transitions. We increase the peak counter
151  // until we hit the next transition and then extract the signal.
152  for (Size k = 0; k < chromatograms.size(); ++k)
153  {
154 
155  double current_rt = input[scan_idx].getRT();
156  if (outsideExtractionWindow_(transition_exp.getTransitions()[k], current_rt, trafo, rt_extraction_window))
157  {
158  continue;
159  }
160 
162  mz = transition_exp.getTransitions()[k].getProductMZ();
163 
164  if (used_filter == 1)
165  {
166  extract_value_tophat(input[scan_idx], mz, peak_idx, integrated_intensity, extract_window, ppm);
167  }
168  else if (used_filter == 2)
169  {
170  extract_value_bartlett(input[scan_idx], mz, peak_idx, integrated_intensity, extract_window, ppm);
171  }
172 
173  p.setRT(current_rt);
174  p.setIntensity(integrated_intensity);
175  chromatograms[k].push_back(p);
176  }
177  }
178  endProgress();
179 
180  // add all the chromatograms to the output
181  output.setChromatograms(chromatograms);
182  }
183 
184 public:
185 
186  template <typename SpectrumT>
187  void extract_value_tophat(const SpectrumT& input, const double& mz, Size& peak_idx, double& integrated_intensity, const double& extract_window, const bool ppm)
188  {
189  integrated_intensity = 0;
190  if (input.size() == 0)
191  {
192  return;
193  }
194 
195  // calculate extraction window
196  double left, right;
197  if (ppm)
198  {
199  left = mz - mz * extract_window / 2.0 * 1.0e-6;
200  right = mz + mz * extract_window / 2.0 * 1.0e-6;
201  }
202  else
203  {
204  left = mz - extract_window / 2.0;
205  right = mz + extract_window / 2.0;
206  }
207 
208  Size walker;
209 
210  // advance the peak_idx until we hit the m/z value of the next transition
211  while (peak_idx < input.size() && input[peak_idx].getMZ() < mz)
212  {
213  peak_idx++;
214  }
215 
216  integrated_intensity = 0;
217 
218  // walk right and left and add to our intensity
219  walker = peak_idx;
220  // if we moved past the end of the spectrum, we need to try the last peak of the spectrum (it could still be within the window)
221  if (peak_idx >= input.size())
222  {
223  walker = input.size() - 1;
224  }
225 
226  // add the current peak if it is between right and left
227  if (input[walker].getMZ() > left && input[walker].getMZ() < right)
228  {
229  integrated_intensity += input[walker].getIntensity();
230  }
231 
232  // walk to the right until we go outside the window, then walk to the left until we are outside the window
233  walker = peak_idx;
234  if (walker > 0)
235  {
236  walker--;
237  }
238  while (walker > 0 && input[walker].getMZ() > left && input[walker].getMZ() < right)
239  {
240  integrated_intensity += input[walker].getIntensity(); walker--;
241  }
242  walker = peak_idx;
243  if (walker < input.size() )
244  {
245  walker++;
246  }
247  while (walker<input.size() && input[walker].getMZ()> left && input[walker].getMZ() < right)
248  {
249  integrated_intensity += input[walker].getIntensity(); walker++;
250  }
251  }
252 
253  template <typename SpectrumT>
254  void extract_value_bartlett(const SpectrumT& input, const double& mz, Size& peak_idx, double& integrated_intensity, const double& extract_window, const bool ppm)
255  {
256  integrated_intensity = 0;
257  if (input.size() == 0)
258  {
259  return;
260  }
261 
262  // calculate extraction window
263  double left, right, half_window_size, weight;
264  if (ppm)
265  {
266  half_window_size = mz * extract_window / 2.0 * 1.0e-6;
267  left = mz - mz * extract_window / 2.0 * 1.0e-6;
268  right = mz + mz * extract_window / 2.0 * 1.0e-6;
269  }
270  else
271  {
272  half_window_size = extract_window / 2.0;
273  left = mz - extract_window / 2.0;
274  right = mz + extract_window / 2.0;
275  }
276 
277  Size walker;
278 
279  // advance the peak_idx until we hit the m/z value of the next transition
280  while (peak_idx < input.size() && input[peak_idx].getMZ() < mz)
281  {
282  peak_idx++;
283  }
284 
285 
286  // walk right and left and add to our intensity
287  walker = peak_idx;
288  // if we moved past the end of the spectrum, we need to try the last peak of the spectrum (it could still be within the window)
289  if (peak_idx >= input.size())
290  {
291  walker = input.size() - 1;
292  }
293 
294  // add the current peak if it is between right and left
295  if (input[walker].getMZ() > left && input[walker].getMZ() < right)
296  {
297  weight = 1 - fabs(input[walker].getMZ() - mz) / half_window_size;
298  integrated_intensity += input[walker].getIntensity() * weight;
299  }
300 
301  // walk to the right until we go outside the window, then walk to the left until we are outside the window
302  walker = peak_idx;
303  if (walker > 0 )
304  {
305  walker--;
306  }
307  while (walker > 0 && input[walker].getMZ() > left && input[walker].getMZ() < right)
308  {
309  weight = 1 - fabs(input[walker].getMZ() - mz) / half_window_size;
310  integrated_intensity += input[walker].getIntensity() * weight; walker--;
311  }
312  walker = peak_idx;
313  if (walker < input.size() )
314  {
315  walker++;
316  }
317  while (walker<input.size() && input[walker].getMZ()> left && input[walker].getMZ() < right)
318  {
319  weight = 1 - fabs(input[walker].getMZ() - mz) / half_window_size;
320  integrated_intensity += input[walker].getIntensity() * weight; walker++;
321  }
322  }
323 
324  void extract_value_tophat(const std::vector<double>::const_iterator& mz_start, std::vector<double>::const_iterator& mz_it,
325  const std::vector<double>::const_iterator& mz_end, std::vector<double>::const_iterator& int_it,
326  const double& mz, double& integrated_intensity, double& extract_window, bool ppm)
327  {
328  integrated_intensity = 0;
329  if (mz_start == mz_end)
330  {
331  return;
332  }
333 
334  // calculate extraction window
335  double left, right;
336  if (ppm)
337  {
338  left = mz - mz * extract_window / 2.0 * 1.0e-6;
339  right = mz + mz * extract_window / 2.0 * 1.0e-6;
340  }
341  else
342  {
343  left = mz - extract_window / 2.0;
344  right = mz + extract_window / 2.0;
345  }
346 
347  std::vector<double>::const_iterator mz_walker;
348  std::vector<double>::const_iterator int_walker;
349 
350  // advance the mz / int iterator until we hit the m/z value of the next transition
351  while (mz_it != mz_end && (*mz_it) < mz)
352  {
353  mz_it++; int_it++;
354  }
355 
356  // walk right and left and add to our intensity
357  mz_walker = mz_it;
358  int_walker = int_it;
359 
360  // if we moved past the end of the spectrum, we need to try the last peak of the spectrum (it could still be within the window)
361  if (mz_it == mz_end)
362  {
363  mz_walker--; int_walker--;
364  }
365 
366  // add the current peak if it is between right and left
367  if ((*mz_walker) > left && (*mz_walker) < right)
368  {
369  integrated_intensity += (*int_walker);
370  }
371 
372  // walk to the right until we go outside the window, then walk to the left until we are outside the window
373  mz_walker = mz_it;
374  int_walker = int_it;
375  if (mz_it != mz_start)
376  {
377  mz_walker--;
378  int_walker--;
379  }
380  while (mz_walker != mz_start && (*mz_walker) > left && (*mz_walker) < right)
381  {
382  integrated_intensity += (*int_walker); mz_walker--; int_walker--;
383  }
384  mz_walker = mz_it;
385  int_walker = int_it;
386  if (mz_it != mz_end)
387  {
388  mz_walker++;
389  int_walker++;
390  }
391  while (mz_walker != mz_end && (*mz_walker) > left && (*mz_walker) < right)
392  {
393  integrated_intensity += (*int_walker); mz_walker++; int_walker++;
394  }
395  }
396 
397 private:
398 
400  template <class SpectrumSettingsT, class ChromatogramT>
401  void prepareSpectra_(SpectrumSettingsT& settings, std::vector<ChromatogramT>& chromatograms, OpenMS::TargetedExperiment& transition_exp)
402  {
403 
404  // first prepare all the spectra (but leave them empty)
405  for (Size i = 0; i < transition_exp.getTransitions().size(); i++)
406  {
407  const ReactionMonitoringTransition* transition = &transition_exp.getTransitions()[i];
408 
409  ChromatogramT chrom;
410  // Create precursor and set
411  // 1) the target m/z
412  // 2) the isolation window (upper/lower)
413  // 3) the peptide sequence
414  Precursor prec;
415  prec.setMZ(transition->getPrecursorMZ());
416  if (settings.getPrecursors().size() > 0)
417  {
418  prec.setIsolationWindowLowerOffset(settings.getPrecursors()[0].getIsolationWindowLowerOffset());
419  prec.setIsolationWindowUpperOffset(settings.getPrecursors()[0].getIsolationWindowUpperOffset());
420  }
421 
422  //set precursor sequence
423  String pepref = transition->getPeptideRef();
424  for (Size pep_idx = 0; pep_idx < transition_exp.getPeptides().size(); pep_idx++)
425  {
426  const OpenMS::TargetedExperiment::Peptide* pep = &transition_exp.getPeptides()[pep_idx];
427  if (pep->id == pepref)
428  {
429  prec.setMetaValue("peptide_sequence", pep->sequence);
430  break;
431  }
432  }
433  // add precursor to spectrum
434  chrom.setPrecursor(prec);
435 
436  // Create product and set its m/z
437  Product prod;
438  prod.setMZ(transition->getProductMZ());
439  chrom.setProduct(prod);
440 
441  // Set the rest of the meta-data
442  chrom.setInstrumentSettings(settings.getInstrumentSettings());
443  chrom.setAcquisitionInfo(settings.getAcquisitionInfo());
444  chrom.setSourceFile(settings.getSourceFile());
445 
446  for (Size i = 0; i < settings.getDataProcessing().size(); ++i)
447  {
448  DataProcessing dp = settings.getDataProcessing()[i];
449  dp.setMetaValue("performed_on_spectra", "true");
450  chrom.getDataProcessing().push_back(dp);
451  }
452 
453  // Set the id of the chromatogram, using the id of the transition (this gives directly the mapping of the two)
454  chrom.setNativeID(transition->getNativeID());
456  chromatograms.push_back(chrom);
457  }
458 
459  }
460 
461  bool outsideExtractionWindow_(const ReactionMonitoringTransition& transition, double current_rt,
462  const TransformationDescription& trafo, double rt_extraction_window)
463  {
464  if (rt_extraction_window < 0)
465  {
466  return false;
467  }
468 
469  // Get the expected retention time, apply the RT-transformation
470  // (which describes the normalization) and then take the difference.
471  // Note that we inverted the transformation in the beginning because
472  // we want to transform from normalized to real RTs here and not the
473  // other way round.
474  double expected_rt = PeptideRTMap_[transition.getPeptideRef()];
475  double de_normalized_experimental_rt = trafo.apply(expected_rt);
476  if (current_rt < de_normalized_experimental_rt - rt_extraction_window || current_rt > de_normalized_experimental_rt + rt_extraction_window)
477  {
478  return true;
479  }
480  return false;
481  }
482 
483  std::map<OpenMS::String, double> PeptideRTMap_;
484 
485  };
486 
487 }
488 
489 #endif
ChromatogramExtractor()
Constructor.
Definition: ChromatogramExtractor.h:68
const double k
std::map< OpenMS::String, double > PeptideRTMap_
Definition: ChromatogramExtractor.h:483
Descripton of the applied preprocessing steps.
Definition: DataProcessing.h:51
void setIsolationWindowUpperOffset(DoubleReal bound)
sets the upper offset from the target m/z
void setMetaValue(const String &name, const DataValue &value)
sets the DataValue corresponding to a name
bool outsideExtractionWindow_(const ReactionMonitoringTransition &transition, double current_rt, const TransformationDescription &trafo, double rt_extraction_window)
Definition: ChromatogramExtractor.h:461
void extract_value_tophat(const SpectrumT &input, const double &mz, Size &peak_idx, double &integrated_intensity, const double &extract_window, const bool ppm)
Definition: ChromatogramExtractor.h:187
A more convenient string class.
Definition: String.h:56
Precursor meta information.
Definition: Precursor.h:56
Product meta information.
Definition: Product.h:49
Peak2D PeakType
Definition: MassTrace.h:49
void setIsolationWindowLowerOffset(DoubleReal bound)
sets the lower offset from the target m/z
void setMZ(DoubleReal mz)
sets the target m/z
Representation of 1D spectrum settings.
Definition: SpectrumSettings.h:64
void setMZ(CoordinateType mz)
Mutable access to m/z.
Definition: Peak1D.h:114
void sortTransitionsByProductMZ()
Lexicographically sorts the transitions by their product m/z.
std::vector< RetentionTime > rts
Definition: TargetedExperimentHelper.h:333
const String & getPeptideRef() const
A method or algorithm argument contains illegal values.
Definition: Exception.h:634
void extractChromatograms(const ExperimentT &input, ExperimentT &output, OpenMS::TargetedExperiment &transition_exp, double extract_window, bool ppm, TransformationDescription trafo, double rt_extraction_window, String filter)
Extract chromatograms defined by the TargetedExperiment from the input map and write them to the outp...
Definition: ChromatogramExtractor.h:79
String id
Definition: TargetedExperimentHelper.h:334
const std::vector< ReactionMonitoringTransition > & getTransitions() const
returns the transition list
DoubleReal apply(DoubleReal value) const
Applies the transformation to value.
~ChromatogramExtractor()
Destructor.
Definition: ChromatogramExtractor.h:73
The ChromatogramExtractor extracts chromatograms from a mzML file.
Definition: ChromatogramExtractor.h:61
String sequence
Definition: TargetedExperimentHelper.h:337
void prepareSpectra_(SpectrumSettingsT &settings, std::vector< ChromatogramT > &chromatograms, OpenMS::TargetedExperiment &transition_exp)
This populates the chromatograms vector with empty chromatograms (but sets their meta-information) ...
Definition: ChromatogramExtractor.h:401
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition: Types.h:144
Base class for all classes that want to report their progess.
Definition: ProgressLogger.h:56
This class stores an prediction of an SRM/MRM transition.
Definition: TargetedExperiment.h:53
const String & getNativeID() const
Generic description of a coordinate transformation.
Definition: TransformationDescription.h:59
void extract_value_bartlett(const SpectrumT &input, const double &mz, Size &peak_idx, double &integrated_intensity, const double &extract_window, const bool ppm)
Definition: ChromatogramExtractor.h:254
void extract_value_tophat(const std::vector< double >::const_iterator &mz_start, std::vector< double >::const_iterator &mz_it, const std::vector< double >::const_iterator &mz_end, std::vector< double >::const_iterator &int_it, const double &mz, double &integrated_intensity, double &extract_window, bool ppm)
Definition: ChromatogramExtractor.h:324
const std::vector< Peptide > & getPeptides() const
void invert()
Computes an (approximate) inverse of the transformation.
Definition: TargetedExperimentHelper.h:211
This class stores a SRM/MRM transition.
Definition: ReactionMonitoringTransition.h:53

OpenMS / TOPP release 1.11.1 Documentation generated on Thu Nov 14 2013 11:19:11 using doxygen 1.8.5