Home  · Classes  · Annotated Classes  · Modules  · Members  · Namespaces  · Related Pages
IDFilter.h
Go to the documentation of this file.
1 // --------------------------------------------------------------------------
2 // OpenMS -- Open-Source Mass Spectrometry
3 // --------------------------------------------------------------------------
4 // Copyright The OpenMS Team -- Eberhard Karls University Tuebingen,
5 // ETH Zurich, and Freie Universitaet Berlin 2002-2013.
6 //
7 // This software is released under a three-clause BSD license:
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above copyright
11 // notice, this list of conditions and the following disclaimer in the
12 // documentation and/or other materials provided with the distribution.
13 // * Neither the name of any author or any participating institution
14 // may be used to endorse or promote products derived from this software
15 // without specific prior written permission.
16 // For a full list of authors, refer to the file AUTHORS.
17 // --------------------------------------------------------------------------
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 // ARE DISCLAIMED. IN NO EVENT SHALL ANY OF THE AUTHORS OR THE CONTRIBUTING
22 // INSTITUTIONS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
25 // OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
27 // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
28 // ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 //
30 // --------------------------------------------------------------------------
31 // $Maintainer: Mathias Walzer $
32 // $Authors: Nico Pfeifer, Mathias Walzer$
33 // --------------------------------------------------------------------------
34 
35 #ifndef OPENMS_FILTERING_ID_IDFILTER_H
36 #define OPENMS_FILTERING_ID_IDFILTER_H
37 
38 #include <OpenMS/config.h>
43 
44 #include <vector>
45 #include <climits>
46 
47 namespace OpenMS
48 {
61  class OPENMS_DLLAPI IDFilter
62  {
63 public:
64 
66  IDFilter();
67 
69  virtual ~IDFilter();
70 
72  template <class IdentificationType>
73  void filterIdentificationsByThreshold(const IdentificationType& identification, DoubleReal threshold_fraction, IdentificationType& filtered_identification)
74  {
75  typedef typename IdentificationType::HitType HitType;
76  std::vector<HitType> temp_hits;
77  std::vector<HitType> filtered_hits;
78 
79  filtered_identification = identification;
80  filtered_identification.setHits(std::vector<HitType>());
81 
82  for (typename std::vector<HitType>::const_iterator it = identification.getHits().begin();
83  it != identification.getHits().end();
84  ++it)
85  {
86  if (it->getScore() >= threshold_fraction * identification.getSignificanceThreshold())
87  {
88  filtered_hits.push_back(*it);
89  }
90  }
91 
92  if (!filtered_hits.empty())
93  {
94  filtered_identification.setHits(filtered_hits);
95  filtered_identification.assignRanks();
96  }
97  }
98 
106  template <class IdentificationType>
107  void filterIdentificationsByScore(const IdentificationType& identification, DoubleReal threshold_score, IdentificationType& filtered_identification)
108  {
109  typedef typename IdentificationType::HitType HitType;
110  std::vector<HitType> temp_hits;
111  std::vector<HitType> filtered_hits;
112 
113  filtered_identification = identification;
114  filtered_identification.setHits(std::vector<HitType>());
115 
116  for (typename std::vector<HitType>::const_iterator it = identification.getHits().begin();
117  it != identification.getHits().end();
118  ++it)
119  {
120  if (identification.isHigherScoreBetter())
121  {
122  if (it->getScore() >= threshold_score)
123  {
124  filtered_hits.push_back(*it);
125  }
126  }
127  else
128  {
129  if (it->getScore() <= threshold_score)
130  {
131  filtered_hits.push_back(*it);
132  }
133  }
134  }
135 
136  if (!filtered_hits.empty())
137  {
138  filtered_identification.setHits(filtered_hits);
139  filtered_identification.assignRanks();
140  }
141  }
142 
149  template <class IdentificationType>
150  void filterIdentificationsByBestNHits(const IdentificationType& identification, Size n, IdentificationType& filtered_identification)
151  {
152  typedef typename IdentificationType::HitType HitType;
153  std::vector<HitType> temp_hits;
154  std::vector<HitType> filtered_hits;
155  Size count = 0;
156 
157  IdentificationType temp_identification = identification;
158  temp_identification.sort(); // .. by score
159 
160  filtered_identification = identification;
161  filtered_identification.setHits(std::vector<HitType>());
162 
163 
164  typename std::vector<HitType>::const_iterator it = temp_identification.getHits().begin();
165  while (it != temp_identification.getHits().end()
166  && count < n)
167  {
168  filtered_hits.push_back(*it);
169  ++it;
170  ++count;
171  }
172 
173  if (!filtered_hits.empty())
174  {
175  filtered_identification.setHits(filtered_hits);
176  filtered_identification.assignRanks();
177  }
178  }
179 
187  template <class IdentificationType>
188  void filterIdentificationsByBestNToMHits(const IdentificationType& identification, Size n, Size m, IdentificationType& filtered_identification)
189  {
190  if (n > m)
191  {
192  std::swap(n, m);
193  }
194 
195  typedef typename IdentificationType::HitType HitType;
196  std::vector<HitType> filtered_hits;
197 
198  IdentificationType temp_identification = identification;
199  temp_identification.sort(); // .. by score
200 
201  filtered_identification = identification;
202  filtered_identification.setHits(std::vector<HitType>());
203 
204  const std::vector<HitType>& hits = temp_identification.getHits();
205  for (Size i = n - 1; n <= m - 1; ++i)
206  {
207  if (i >= hits.size())
208  {
209  break;
210  }
211  filtered_hits.push_back(hits[i]);
212  }
213 
214  if (!filtered_hits.empty())
215  {
216  filtered_identification.setHits(filtered_hits);
217  filtered_identification.assignRanks();
218  }
219  }
220 
222  void filterIdentificationsByBestHits(const PeptideIdentification& identification, PeptideIdentification& filtered_identification, bool strict = false);
223 
227  void filterIdentificationsByProteins(const PeptideIdentification& identification, const std::vector<FASTAFile::FASTAEntry>& proteins, PeptideIdentification& filtered_identification, bool no_protein_identifiers = false);
228 
232  void filterIdentificationsByProteins(const ProteinIdentification& identification, const std::vector<FASTAFile::FASTAEntry>& proteins, ProteinIdentification& filtered_identification);
233 
235  void filterIdentificationsByExclusionPeptides(const PeptideIdentification& identification, const std::set<String>& peptides, PeptideIdentification& filtered_identification);
236 
238  void filterIdentificationsByLength(const PeptideIdentification& identification, PeptideIdentification& filtered_identification, Size min_length, Size max_length = UINT_MAX);
239 
241  void filterIdentificationsByCharge(const PeptideIdentification& identification, Int charge, PeptideIdentification& filtered_identification);
242 
244  void filterIdentificationsByVariableModifications(const PeptideIdentification& identification, const std::vector<String>& fixed_modifications, PeptideIdentification& filtered_identification);
245 
247  void removeUnreferencedProteinHits(const ProteinIdentification& identification, const std::vector<PeptideIdentification> peptide_identifications, ProteinIdentification& filtered_identification);
248 
250  void filterIdentificationsUnique(const PeptideIdentification& identification, PeptideIdentification& filtered_identification);
251 
253  void filterIdentificationsByMzError(const PeptideIdentification& identification, DoubleReal mass_error, bool unit_ppm, PeptideIdentification& filtered_identification);
254 
262  void filterIdentificationsByRTPValues(const PeptideIdentification& identification, PeptideIdentification& filtered_identification, DoubleReal p_value = 0.05);
263 
271  void filterIdentificationsByRTFirstDimPValues(const PeptideIdentification& identification,
272  PeptideIdentification& filtered_identification,
273  DoubleReal p_value = 0.05);
274 
276  template <class PeakT>
277  void filterIdentificationsByThresholds(MSExperiment<PeakT>& experiment, DoubleReal peptide_threshold_fraction, DoubleReal protein_threshold_fraction)
278  {
279  //filter protein hits
280  ProteinIdentification temp_protein_identification;
281  std::vector<ProteinIdentification> filtered_protein_identifications;
282 
283  for (Size j = 0; j < experiment.getProteinIdentifications().size(); j++)
284  {
285  filterIdentificationsByThreshold(experiment.getProteinIdentifications()[j], protein_threshold_fraction, temp_protein_identification);
286  if (!temp_protein_identification.getHits().empty())
287  {
288  filtered_protein_identifications.push_back(temp_protein_identification);
289  }
290  }
291  experiment.setProteinIdentifications(filtered_protein_identifications);
292 
293  //filter peptide hits
294  PeptideIdentification temp_identification;
295  std::vector<PeptideIdentification> filtered_identifications;
296 
297  for (Size i = 0; i < experiment.size(); i++)
298  {
299  for (Size j = 0; j < experiment[i].getPeptideIdentifications().size(); j++)
300  {
301  filterIdentificationsByThreshold(experiment[i].getPeptideIdentifications()[j], peptide_threshold_fraction, temp_identification);
302  if (!temp_identification.getHits().empty())
303  {
304  filtered_identifications.push_back(temp_identification);
305  }
306  }
307  experiment[i].setPeptideIdentifications(filtered_identifications);
308  filtered_identifications.clear();
309  }
310  }
311 
313  template <class PeakT>
314  void filterIdentificationsByScores(MSExperiment<PeakT>& experiment, DoubleReal peptide_threshold_score, DoubleReal protein_threshold_score)
315  {
316  //filter protein hits
317  ProteinIdentification temp_protein_identification;
318  std::vector<ProteinIdentification> filtered_protein_identifications;
319 
320  for (Size j = 0; j < experiment.getProteinIdentifications().size(); j++)
321  {
322  filterIdentificationsByScore(experiment.getProteinIdentifications()[j], protein_threshold_score, temp_protein_identification);
323  if (!temp_protein_identification.getHits().empty())
324  {
325  filtered_protein_identifications.push_back(temp_protein_identification);
326  }
327  }
328  experiment.setProteinIdentifications(filtered_protein_identifications);
329 
330  //filter peptide hits
331  PeptideIdentification temp_identification;
332  std::vector<PeptideIdentification> filtered_identifications;
333 
334  for (Size i = 0; i < experiment.size(); i++)
335  {
336  for (Size j = 0; j < experiment[i].getPeptideIdentifications().size(); j++)
337  {
338  filterIdentificationsByScore(experiment[i].getPeptideIdentifications()[j], peptide_threshold_score, temp_identification);
339  if (!temp_identification.getHits().empty())
340  {
341  filtered_identifications.push_back(temp_identification);
342  }
343  }
344  experiment[i].setPeptideIdentifications(filtered_identifications);
345  filtered_identifications.clear();
346  }
347  }
348 
350  template <class PeakT>
352  {
353  //filter protein hits
354  ProteinIdentification temp_protein_identification;
355  std::vector<ProteinIdentification> filtered_protein_identifications;
356 
357  for (Size j = 0; j < experiment.getProteinIdentifications().size(); j++)
358  {
359  filterIdentificationsByBestNHits(experiment.getProteinIdentifications()[j], n, temp_protein_identification);
360  if (!temp_protein_identification.getHits().empty())
361  {
362  filtered_protein_identifications.push_back(temp_protein_identification);
363  }
364  }
365  experiment.setProteinIdentifications(filtered_protein_identifications);
366 
367  //filter peptide hits
368  PeptideIdentification temp_identification;
369  std::vector<PeptideIdentification> filtered_identifications;
370 
371  for (Size i = 0; i < experiment.size(); i++)
372  {
373  for (Size j = 0; j < experiment[i].getPeptideIdentifications().size(); j++)
374  {
375  filterIdentificationsByBestNHits(experiment[i].getPeptideIdentifications()[j], n, temp_identification);
376  if (!temp_identification.getHits().empty())
377  {
378  filtered_identifications.push_back(temp_identification);
379  }
380  }
381  experiment[i].setPeptideIdentifications(filtered_identifications);
382  filtered_identifications.clear();
383  }
384  }
385 
387  template <class PeakT>
389  const std::vector<FASTAFile::FASTAEntry>& proteins)
390  {
391  std::vector<PeptideIdentification> temp_identifications;
392  std::vector<PeptideIdentification> filtered_identifications;
393  PeptideIdentification temp_identification;
394 
395  for (Size i = 0; i < experiment.size(); i++)
396  {
397  if (experiment[i].getMSLevel() == 2)
398  {
399  temp_identifications = experiment[i].getPeptideIdentifications();
400  for (Size j = 0; j < temp_identifications.size(); j++)
401  {
402  filterIdentificationsByProteins(temp_identifications[j], proteins, temp_identification);
403  if (!temp_identification.getHits().empty())
404  {
405  filtered_identifications.push_back(temp_identification);
406  }
407  }
408  experiment[i].setPeptideIdentifications(filtered_identifications);
409  filtered_identifications.clear();
410  }
411  }
412  }
413 
414  };
415 
416 } // namespace OpenMS
417 
418 #endif // OPENMS_FILTERING_ID_IDFILTER_H
Representation of a protein identification run.
Definition: ProteinIdentification.h:61
void setProteinIdentifications(const std::vector< ProteinIdentification > &protein_identifications)
sets the protein ProteinIdentification vector
Size size() const
Definition: MSExperiment.h:117
const std::vector< ProteinIdentification > & getProteinIdentifications() const
returns a const reference to the protein ProteinIdentification vector
void filterIdentificationsByThreshold(const IdentificationType &identification, DoubleReal threshold_fraction, IdentificationType &filtered_identification)
filters a ProteinIdentification or PeptideIdentification by only allowing peptides/proteins which rea...
Definition: IDFilter.h:73
void filterIdentificationsByBestNHits(MSExperiment< PeakT > &experiment, Size n)
filters an MS/MS experiment corresponding to the best n hits for every spectrum
Definition: IDFilter.h:351
void filterIdentificationsByScores(MSExperiment< PeakT > &experiment, DoubleReal peptide_threshold_score, DoubleReal protein_threshold_score)
filters an MS/MS experiment corresponding to the threshold_fractions
Definition: IDFilter.h:314
void filterIdentificationsByBestNHits(const IdentificationType &identification, Size n, IdentificationType &filtered_identification)
filters a ProteinIdentification or PeptideIdentification corresponding to the score.
Definition: IDFilter.h:150
void filterIdentificationsByScore(const IdentificationType &identification, DoubleReal threshold_score, IdentificationType &filtered_identification)
filters a ProteinIdentification or PeptideIdentification corresponding to the threshold_score ...
Definition: IDFilter.h:107
const std::vector< PeptideHit > & getHits() const
returns the peptide hits
void filterIdentificationsByBestNToMHits(const IdentificationType &identification, Size n, Size m, IdentificationType &filtered_identification)
filters a ProteinIdentification or PeptideIdentification corresponding to the score.
Definition: IDFilter.h:188
Representation of a mass spectrometry experiment.
Definition: MSExperiment.h:68
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition: Types.h:144
void filterIdentificationsByThresholds(MSExperiment< PeakT > &experiment, DoubleReal peptide_threshold_fraction, DoubleReal protein_threshold_fraction)
filters an MS/MS experiment corresponding to the threshold_fractions
Definition: IDFilter.h:277
const std::vector< ProteinHit > & getHits() const
Returns the protein hits.
used to filter identifications by different criteria
Definition: IDFilter.h:61
int Int
Signed integer type.
Definition: Types.h:100
void filterIdentificationsByProteins(MSExperiment< PeakT > &experiment, const std::vector< FASTAFile::FASTAEntry > &proteins)
filters an MS/MS experiment corresponding to the given proteins
Definition: IDFilter.h:388
Represents the peptide hits for a spectrum.
Definition: PeptideIdentification.h:63

OpenMS / TOPP release 1.11.1 Documentation generated on Thu Nov 14 2013 11:19:15 using doxygen 1.8.5