Home  · Classes  · Annotated Classes  · Modules  · Members  · Namespaces  · Related Pages
InternalCalibration.h
Go to the documentation of this file.
1 // --------------------------------------------------------------------------
2 // OpenMS -- Open-Source Mass Spectrometry
3 // --------------------------------------------------------------------------
4 // Copyright The OpenMS Team -- Eberhard Karls University Tuebingen,
5 // ETH Zurich, and Freie Universitaet Berlin 2002-2013.
6 //
7 // This software is released under a three-clause BSD license:
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above copyright
11 // notice, this list of conditions and the following disclaimer in the
12 // documentation and/or other materials provided with the distribution.
13 // * Neither the name of any author or any participating institution
14 // may be used to endorse or promote products derived from this software
15 // without specific prior written permission.
16 // For a full list of authors, refer to the file AUTHORS.
17 // --------------------------------------------------------------------------
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 // ARE DISCLAIMED. IN NO EVENT SHALL ANY OF THE AUTHORS OR THE CONTRIBUTING
22 // INSTITUTIONS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
25 // OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
27 // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
28 // ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 //
30 // --------------------------------------------------------------------------
31 // $Maintainer: Alexandra Zerck $
32 // $Authors: $
33 // --------------------------------------------------------------------------
34 
35 
36 #ifndef OPENMS_FILTERING_CALIBRATION_INTERNALCALIBRATION_H
37 #define OPENMS_FILTERING_CALIBRATION_INTERNALCALIBRATION_H
43 #include <OpenMS/FORMAT/MzMLFile.h>
45 
46 #include <gsl/gsl_fit.h>
47 
48 namespace OpenMS
49 {
50 
62  class OPENMS_DLLAPI InternalCalibration :
63  public DefaultParamHandler,
64  public ProgressLogger
65  {
66 public:
69 
72 
85  template <typename InputPeakType>
86  void calibrateMapSpectrumwise(const MSExperiment<InputPeakType> & exp, MSExperiment<InputPeakType> & calibrated_exp, std::vector<DoubleReal> & ref_masses);
87 
100  template <typename InputPeakType>
101  void calibrateMapGlobally(const MSExperiment<InputPeakType> & exp, MSExperiment<InputPeakType> & calibrated_exp, std::vector<DoubleReal> & ref_masses, String trafo_file_name = "");
102 
116  template <typename InputPeakType>
117  void calibrateMapGlobally(const MSExperiment<InputPeakType> & exp, MSExperiment<InputPeakType> & calibrated_exp, std::vector<PeptideIdentification> & ref_ids, String trafo_file_name = "");
118 
127  void calibrateMapGlobally(const FeatureMap<> & feature_map, FeatureMap<> & calibrated_feature_map, String trafo_file_name = "");
128 
138  void calibrateMapGlobally(const FeatureMap<> & feature_map, FeatureMap<> & calibrated_feature_map, std::vector<PeptideIdentification> & ref_ids, String trafo_file_name = "");
139 
140 
141 
142  template <typename InputPeakType>
143  void calibrateMapList(std::vector<MSExperiment<InputPeakType> > & exp_list, std::vector<MSExperiment<InputPeakType> > & calibrated_exp_list, std::vector<DoubleReal> & ref_masses, std::vector<DoubleReal> & detected_background_masses);
144 
145 
146 protected:
147 
149  void makeLinearRegression_(std::vector<DoubleReal> & observed_masses, std::vector<DoubleReal> & theoretical_masses);
150 
152  void checkReferenceIds_(std::vector<PeptideIdentification> & pep_ids);
153 
155  void checkReferenceIds_(const FeatureMap<> & feature_map);
156 
158  void applyTransformation_(const FeatureMap<> & feature_map, FeatureMap<> & calibrated_feature_map);
159 
162  }; // class InternalCalibration
163 
164 
165  template <typename InputPeakType>
166  void InternalCalibration::calibrateMapSpectrumwise(const MSExperiment<InputPeakType> & exp, MSExperiment<InputPeakType> & calibrated_exp, std::vector<DoubleReal> & ref_masses)
167  {
168 #ifdef DEBUG_CALIBRATION
169  std::cout.precision(writtenDigits<DoubleReal>());
170 #endif
171  if (exp.empty())
172  {
173  std::cout << "Input is empty." << std::endl;
174  return;
175  }
176 
177  if (exp[0].getType() != SpectrumSettings::PEAKS)
178  {
179  std::cout << "Attention: this function is assuming peak data." << std::endl;
180  }
181  calibrated_exp = exp;
182 
183  Size num_ref_peaks = ref_masses.size();
184  bool use_ppm = (param_.getValue("mz_tolerance_unit") == "ppm") ? true : false;
185  DoubleReal mz_tol = param_.getValue("mz_tolerance");
186  startProgress(0, exp.size(), "calibrate spectra");
187  // for each spectrum
188  for (Size spec = 0; spec < exp.size(); ++spec)
189  {
190  // calibrate only MS1 spectra
191  if (exp[spec].getMSLevel() != 1)
192  {
193  continue;
194  }
195 
196 
197  std::vector<DoubleReal> corr_masses, rel_errors, found_ref_masses;
198  UInt corr_peaks = 0;
199  for (Size peak = 0; peak < exp[spec].size(); ++peak)
200  {
201  for (Size ref_peak = 0; ref_peak < num_ref_peaks; ++ref_peak)
202  {
203  if (!use_ppm && fabs(exp[spec][peak].getMZ() - ref_masses[ref_peak]) < mz_tol)
204  {
205  found_ref_masses.push_back(ref_masses[ref_peak]);
206  corr_masses.push_back(exp[spec][peak].getMZ());
207  ++corr_peaks;
208  break;
209  }
210  else if (use_ppm && fabs(exp[spec][peak].getMZ() - ref_masses[ref_peak]) / ref_masses[ref_peak] * 1e6 < mz_tol)
211  {
212  found_ref_masses.push_back(ref_masses[ref_peak]);
213  corr_masses.push_back(exp[spec][peak].getMZ());
214  ++corr_peaks;
215  break;
216  }
217  }
218  }
219  if (corr_peaks < 2)
220  {
221  std::cout << "spec: " << spec
222  << " less than 2 reference masses were detected within a reasonable error range\n";
223  std::cout << "This spectrum cannot be calibrated!\n";
224  continue;
225  }
226 
227  // determine rel error in ppm for the two reference masses
228  for (Size ref_peak = 0; ref_peak < found_ref_masses.size(); ++ref_peak)
229  {
230  rel_errors.push_back((found_ref_masses[ref_peak] - corr_masses[ref_peak]) / corr_masses[ref_peak] * 1e6);
231  }
232 
233  makeLinearRegression_(corr_masses, found_ref_masses);
234 
235  // now calibrate the whole spectrum
236  for (unsigned int peak = 0; peak < calibrated_exp[spec].size(); ++peak)
237  {
238 #ifdef DEBUG_CALIBRATION
239  std::cout << calibrated_exp[spec][peak].getMZ() << "\t";
240 #endif
241  DoubleReal mz = calibrated_exp[spec][peak].getMZ();
242  mz = trafo_.apply(mz);
243  calibrated_exp[spec][peak].setMZ(mz);
244 #ifdef DEBUG_CALIBRATION
245  std::cout << calibrated_exp[spec][peak].getMZ() << std::endl;
246 #endif
247 
248  }
249  setProgress(spec);
250  } // for(Size spec=0;spec < exp.size(); ++spec)
251  endProgress();
252  }
253 
254  template <typename InputPeakType>
256  MSExperiment<InputPeakType> & calibrated_exp,
257  std::vector<PeptideIdentification> & ref_ids, String trafo_file_name)
258  {
259  bool use_ppm = param_.getValue("mz_tolerance_unit") == "ppm" ? true : false;
260  DoubleReal mz_tolerance = param_.getValue("mz_tolerance");
261  if (exp.empty())
262  {
263  std::cout << "Input is empty." << std::endl;
264  return;
265  }
266 
267  if (exp[0].getType() != SpectrumSettings::PEAKS)
268  {
269  std::cout << "Attention: this function is assuming peak data." << std::endl;
270  }
271  // check if the ids contain meta information about the peak positions
272  checkReferenceIds_(ref_ids);
273 
274  std::vector<DoubleReal> theoretical_masses, observed_masses;
275  for (Size p_id = 0; p_id < ref_ids.size(); ++p_id)
276  {
277  for (Size p_h = 0; p_h < ref_ids[p_id].getHits().size(); ++p_h)
278  {
279  Int charge = ref_ids[p_id].getHits()[p_h].getCharge();
280  DoubleReal theo_mass = ref_ids[p_id].getHits()[p_h].getSequence().getMonoWeight(Residue::Full, charge) / (DoubleReal)charge;
281  // first find corresponding ms1-spectrum
282  typename MSExperiment<InputPeakType>::ConstIterator rt_iter = exp.RTBegin(ref_ids[p_id].getMetaValue("RT"));
283  while (rt_iter != exp.begin() && rt_iter->getMSLevel() != 1)
284  {
285  --rt_iter;
286  }
287  // now find closest peak
288  typename MSSpectrum<InputPeakType>::ConstIterator mz_iter = rt_iter->MZBegin(ref_ids[p_id].getMetaValue("MZ"));
289  //std::cout << mz_iter->getMZ() <<" "<<(DoubleReal)ref_ids[p_id].getMetaValue("MZ")<<"\t";
290  DoubleReal dist = (DoubleReal)ref_ids[p_id].getMetaValue("MZ") - mz_iter->getMZ();
291  //std::cout << dist << "\t";
292  if ((mz_iter + 1) != rt_iter->end()
293  && fabs((mz_iter + 1)->getMZ() - (DoubleReal)ref_ids[p_id].getMetaValue("MZ")) < fabs(dist)
294  && mz_iter != rt_iter->begin()
295  && fabs((mz_iter - 1)->getMZ() - (DoubleReal)ref_ids[p_id].getMetaValue("MZ")) < fabs((mz_iter + 1)->getMZ() - (DoubleReal)ref_ids[p_id].getMetaValue("MZ"))) // if mz_iter +1 has smaller dist than mz_iter and mz_iter-1
296  {
297  if ((use_ppm &&
298  fabs((mz_iter + 1)->getMZ() - (DoubleReal)ref_ids[p_id].getMetaValue("MZ")) / (DoubleReal)ref_ids[p_id].getMetaValue("MZ") * 1e06 < mz_tolerance) ||
299  (!use_ppm && fabs((mz_iter + 1)->getMZ() - (DoubleReal)ref_ids[p_id].getMetaValue("MZ")) < mz_tolerance))
300  {
301  //std::cout <<(mz_iter +1)->getMZ() - (DoubleReal)ref_ids[p_id].getMetaValue("MZ")<<"\t";
302  observed_masses.push_back((mz_iter + 1)->getMZ());
303  theoretical_masses.push_back(theo_mass);
304  //std::cout << (mz_iter +1)->getMZ() << " ~ "<<theo_mass << " charge: "<<ref_ids[p_id].getHits()[p_h].getCharge()
305  //<< "\tplus 1"<< std::endl;
306  }
307  }
308  else if (mz_iter != rt_iter->begin()
309  && fabs((mz_iter - 1)->getMZ() - (DoubleReal)ref_ids[p_id].getMetaValue("MZ")) < fabs(dist)) // if mz_iter-1 has smaller dist than mz_iter
310  {
311  if ((use_ppm &&
312  fabs((mz_iter - 1)->getMZ() - (DoubleReal)ref_ids[p_id].getMetaValue("MZ")) / (DoubleReal)ref_ids[p_id].getMetaValue("MZ") * 1e06 < mz_tolerance) ||
313  (!use_ppm && fabs((mz_iter - 1)->getMZ() - (DoubleReal)ref_ids[p_id].getMetaValue("MZ")) < mz_tolerance))
314  {
315  //std::cout <<(mz_iter -1)->getMZ() - (DoubleReal)ref_ids[p_id].getMetaValue("MZ")<<"\t";
316  observed_masses.push_back((mz_iter - 1)->getMZ());
317  theoretical_masses.push_back(theo_mass);
318  //std::cout << (mz_iter -1)->getMZ() << " ~ "<<theo_mass << " charge: "<<ref_ids[p_id].getHits()[p_h].getCharge()
319  //<< "\tminus 1"<< std::endl;
320  }
321  }
322  else
323  {
324  if ((use_ppm &&
325  fabs((mz_iter)->getMZ() - (DoubleReal)ref_ids[p_id].getMetaValue("MZ")) / (DoubleReal)ref_ids[p_id].getMetaValue("MZ") * 1e06 < mz_tolerance) ||
326  (!use_ppm && fabs((mz_iter)->getMZ() - (DoubleReal)ref_ids[p_id].getMetaValue("MZ")) < mz_tolerance))
327  {
328 
329  observed_masses.push_back(mz_iter->getMZ());
330  theoretical_masses.push_back(theo_mass);
331 // std::cout <<"\t"<< mz_iter->getMZ() << " ~ "<<theo_mass<< " charge: "<<ref_ids[p_id].getHits()[p_h].getCharge()
332 // << "\tat mz_iter"<< std::endl;
333  }
334  }
335  }
336  }
337 
338  makeLinearRegression_(observed_masses, theoretical_masses);
339  static_cast<ExperimentalSettings &>(calibrated_exp) = exp;
340  calibrated_exp.resize(exp.size());
341 
342  // for each spectrum
343  for (Size spec = 0; spec < exp.size(); ++spec)
344  {
345  // calibrate only MS1 spectra
346  if (exp[spec].getMSLevel() != 1)
347  {
348  calibrated_exp[spec] = exp[spec];
349  continue;
350  }
351  // copy the spectrum meta data
352  calibrated_exp[spec] = exp[spec];
353 
354  for (unsigned int peak = 0; peak < exp[spec].size(); ++peak)
355  {
356 #ifdef DEBUG_CALIBRATION
357  std::cout << exp[spec][peak].getMZ() << "\t";
358 #endif
359  DoubleReal mz = exp[spec][peak].getMZ();
360  mz = trafo_.apply(mz);
361  calibrated_exp[spec][peak].setMZ(mz);
362 #ifdef DEBUG_CALIBRATION
363  std::cout << calibrated_exp[spec][peak].getMZ() << std::endl;
364 #endif
365 
366  }
367  } // for(Size spec=0;spec < exp.size(); ++spec)
368  if (trafo_file_name != "")
369  {
370  TransformationXMLFile().store(trafo_file_name, trafo_);
371  }
372  }
373 
374  template <typename InputPeakType>
375  void InternalCalibration::calibrateMapGlobally(const MSExperiment<InputPeakType> & exp, MSExperiment<InputPeakType> & calibrated_exp, std::vector<DoubleReal> & ref_masses, String trafo_file_name)
376  {
377  if (exp.empty())
378  {
379  std::cout << "Input is empty." << std::endl;
380  return;
381  }
382 
383  if (exp[0].getType() != SpectrumSettings::PEAKS)
384  {
385  std::cout << "Attention: this function is assuming peak data." << std::endl;
386  }
387 
388 
389  Size num_ref_peaks = ref_masses.size();
390  bool use_ppm = (param_.getValue("mz_tolerance_unit") == "ppm") ? true : false;
391  DoubleReal mz_tol = param_.getValue("mz_tolerance");
392  startProgress(0, exp.size(), "calibrate spectra");
393  std::vector<DoubleReal> corr_masses, rel_errors, found_ref_masses;
394  UInt corr_peaks = 0;
395  // for each spectrum
396  for (Size spec = 0; spec < exp.size(); ++spec)
397  {
398  // calibrate only MS1 spectra
399  if (exp[spec].getMSLevel() != 1)
400  continue;
401  for (Size peak = 0; peak < exp[spec].size(); ++peak)
402  {
403  for (Size ref_peak = 0; ref_peak < num_ref_peaks; ++ref_peak)
404  {
405  if (!use_ppm && fabs(exp[spec][peak].getMZ() - ref_masses[ref_peak]) < mz_tol)
406  {
407  found_ref_masses.push_back(ref_masses[ref_peak]);
408  corr_masses.push_back(exp[spec][peak].getMZ());
409  ++corr_peaks;
410  break;
411  }
412  else if (use_ppm && fabs(exp[spec][peak].getMZ() - ref_masses[ref_peak]) / ref_masses[ref_peak] * 1e6 < mz_tol)
413  {
414  found_ref_masses.push_back(ref_masses[ref_peak]);
415  corr_masses.push_back(exp[spec][peak].getMZ());
416  ++corr_peaks;
417  break;
418  }
419  }
420  }
421  }
422  if (corr_peaks < 2)
423  {
424  std::cout << "Less than 2 reference masses were detected within a reasonable error range\n";
425  std::cout << "This spectrum cannot be calibrated!\n";
426  return;
427  }
428 
429  // calculate the (linear) calibration function
430  makeLinearRegression_(corr_masses, found_ref_masses);
431  static_cast<ExperimentalSettings &>(calibrated_exp) = exp;
432  calibrated_exp.resize(exp.size());
433 
434  // apply the calibration function to each peak
435  for (Size spec = 0; spec < exp.size(); ++spec)
436  {
437  // calibrate only MS1 spectra
438  if (exp[spec].getMSLevel() != 1)
439  {
440  calibrated_exp[spec] = exp[spec];
441  continue;
442  }
443 
444  // copy the spectrum data
445  calibrated_exp[spec] = exp[spec];
446 
447  for (unsigned int peak = 0; peak < exp[spec].size(); ++peak)
448  {
449 #ifdef DEBUG_CALIBRATION
450  std::cout << exp[spec][peak].getMZ() << "\t";
451 #endif
452  DoubleReal mz = exp[spec][peak].getMZ();
453  mz = trafo_.apply(mz);
454  calibrated_exp[spec][peak].setMZ(mz);
455 
456 #ifdef DEBUG_CALIBRATION
457  std::cout << calibrated_exp[spec][peak].getMZ() << std::endl;
458 #endif
459 
460  }
461  setProgress(spec);
462  } // for(Size spec=0;spec < exp.size(); ++spec)
463  endProgress();
464  if (trafo_file_name != "")
465  {
466  TransformationXMLFile().store(trafo_file_name, trafo_);
467  }
468  }
469 
470 } // namespace OpenMS
471 
472 #endif // OPENMS_FILTERING_CALIBRATION_INTERNALCALIBRATION_H
void calibrateMapGlobally(const MSExperiment< InputPeakType > &exp, MSExperiment< InputPeakType > &calibrated_exp, std::vector< DoubleReal > &ref_masses, String trafo_file_name="")
Calibrate a peak map using given reference masses with one calibration function for the whole map...
Definition: InternalCalibration.h:375
A more convenient string class.
Definition: String.h:56
Size size() const
Definition: MSExperiment.h:117
Peak data (also called centroided data or stick data)
Definition: SpectrumSettings.h:74
Param param_
Container for current parameters.
Definition: DefaultParamHandler.h:148
void makeLinearRegression_(std::vector< DoubleReal > &observed_masses, std::vector< DoubleReal > &theoretical_masses)
the actual calibration function
A simple calibration method using linear interpolation of given reference masses. ...
Definition: InternalCalibration.h:62
ContainerType::const_iterator ConstIterator
Non-mutable iterator.
Definition: MSSpectrum.h:125
Iterator begin()
Definition: MSExperiment.h:147
void resize(Size s)
Definition: MSExperiment.h:122
void store(String filename, const TransformationDescription &transformation)
Stores the data in an TransformationXML file.
Iterator end()
Definition: MSExperiment.h:157
ConstIterator RTBegin(CoordinateType rt) const
Fast search for spectrum range begin.
Definition: MSExperiment.h:349
Used to load and store TransformationXML files.
Definition: TransformationXMLFile.h:57
void endProgress() const
Ends the progress display.
~InternalCalibration()
Destructor.
Definition: InternalCalibration.h:71
const DataValue & getValue(const String &key) const
Returns a value of a parameter.
DoubleReal apply(DoubleReal value) const
Applies the transformation to value.
Definition: Residue.h:363
Representation of a mass spectrometry experiment.
Definition: MSExperiment.h:68
void calibrateMapSpectrumwise(const MSExperiment< InputPeakType > &exp, MSExperiment< InputPeakType > &calibrated_exp, std::vector< DoubleReal > &ref_masses)
Calibrate a peak map using given reference masses with a separate calibration function for each spect...
Definition: InternalCalibration.h:166
std::vector< SpectrumType >::const_iterator ConstIterator
Non-mutable iterator.
Definition: MSExperiment.h:103
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition: Types.h:144
void checkReferenceIds_(std::vector< PeptideIdentification > &pep_ids)
check if reference ids contain RT and MZ information as meta values
Base class for all classes that want to report their progess.
Definition: ProgressLogger.h:56
void startProgress(SignedSize begin, SignedSize end, const String &label) const
Initializes the progress display.
bool empty() const
Definition: MSExperiment.h:127
void setProgress(SignedSize value) const
Sets the current progress.
Generic description of a coordinate transformation.
Definition: TransformationDescription.h:59
A base class for all classes handling default parameters.
Definition: DefaultParamHandler.h:90
int Int
Signed integer type.
Definition: Types.h:100
Description of the experimental settings.
Definition: ExperimentalSettings.h:59
TransformationDescription trafo_
here the transformation is stored
Definition: InternalCalibration.h:161
double DoubleReal
Double-precision real type.
Definition: Types.h:118

OpenMS / TOPP release 1.11.1 Documentation generated on Thu Nov 14 2013 11:19:15 using doxygen 1.8.5