Home  · Classes  · Annotated Classes  · Modules  · Members  · Namespaces  · Related Pages
ProteinResolver.h
Go to the documentation of this file.
1 // --------------------------------------------------------------------------
2 // OpenMS -- Open-Source Mass Spectrometry
3 // --------------------------------------------------------------------------
4 // Copyright The OpenMS Team -- Eberhard Karls University Tuebingen,
5 // ETH Zurich, and Freie Universitaet Berlin 2002-2013.
6 //
7 // This software is released under a three-clause BSD license:
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above copyright
11 // notice, this list of conditions and the following disclaimer in the
12 // documentation and/or other materials provided with the distribution.
13 // * Neither the name of any author or any participating institution
14 // may be used to endorse or promote products derived from this software
15 // without specific prior written permission.
16 // For a full list of authors, refer to the file AUTHORS.
17 // --------------------------------------------------------------------------
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 // ARE DISCLAIMED. IN NO EVENT SHALL ANY OF THE AUTHORS OR THE CONTRIBUTING
22 // INSTITUTIONS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
25 // OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
27 // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
28 // ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 //
30 // --------------------------------------------------------------------------
31 // $Maintainer: David Wojnar $
32 // $Authors: David Wojnar $
33 // --------------------------------------------------------------------------
34 
35 #ifndef OPENMS_ANALYSIS_QUANTITATION_PROTEINRESOLVER_H
36 #define OPENMS_ANALYSIS_QUANTITATION_PROTEINRESOLVER_H
37 
45 
46 
47 namespace OpenMS
48 {
59  class OPENMS_DLLAPI ProteinResolver :
60  public DefaultParamHandler
61  {
62 
63 public:
64 
65  //default construtor
67 
68  //copy constructor
69  ProteinResolver(const ProteinResolver & rhs);
70 
71  //assignment operator
72  ProteinResolver & operator=(const ProteinResolver & rhs);
73 
74  //destructor
75  virtual ~ProteinResolver();
76 
77 
78  struct ProteinEntry;
79  struct PeptideEntry;
80  struct ISDGroup;
81  struct MSDGroup;
82  struct ResolverResult;
83 
84  //represents a protein from fasta file
85  struct ProteinEntry
86  {
87  std::list<PeptideEntry *> peptides;
88  bool traversed;
90  enum type {primary, secondary, primary_indistinguishable, secondary_indistinguishable} protein_type;
91  DoubleReal weight; //monoisotopic
92  Real coverage; //in percent
93  //if Protein is indistinguishable all his fellows are in the list indis
94  std::list<ProteinEntry *> indis;
96  Size msd_group; //index
97  Size isd_group; //index
99  };
100 
101  //represents a peptide. First in silco. If experimental is set to true it is MS/MS derived.
103  {
104  std::list<ProteinEntry *> proteins;
105  bool traversed;
110  Size msd_group; //index
111  Size isd_group; //index
115  };
116 
117  //representation of an msd group. contains peptides, proteins and a pointer to its ISD group
118  struct MSDGroup
119  {
120  std::list<ProteinEntry *> proteins;
121  std::list<PeptideEntry *> peptides;
127  Real intensity; // intensity of the MSD Group. Defined as the median of the peptide intensities.
128  };
129 
130  struct ISDGroup
131  {
132  std::list<ProteinEntry *> proteins;
133  std::list<PeptideEntry *> peptides;
135  std::list<Size> msd_groups;
136  };
137 
139  {
141  std::vector<ISDGroup> * isds;
142  std::vector<MSDGroup> * msds;
143  std::vector<ProteinEntry> * protein_entries;
144  std::vector<PeptideEntry> * peptide_entries;
145  std::vector<Size> * reindexed_peptides;
146  std::vector<Size> * reindexed_proteins;
147  enum type {PeptideIdent, Consensus} input_type;
148  std::vector<PeptideIdentification> * peptide_identification;
150  };
151 
159  void resolveConsensus(ConsensusMap & consensus);
160 
168  void resolveID(std::vector<PeptideIdentification> & peptide_identifications);
169 
180  // void writeProteinsAndPeptidesmzTab(std::vector<ProteinEntry>& protein_nodes, std::vector<PeptideEntry>& peptide_nodes, std::vector<Size>& reindexed_proteins, std::vector<Size>& reindexed_peptides, std::vector<PeptideIdentification>& peptide_identifications, String& output );
189  // void writePeptideTable(std::vector<PeptideEntry> & peptides, std::vector<Size> & reindexed_peptides, std::vector<PeptideIdentification> & identifications, String & output_file); // not implemented
198  // void writePeptideTable(std::vector<PeptideEntry> & peptides, std::vector<Size> & reindexed_peptides, ConsensusMap & consensus, String & output_file); // not implemented
206  // void writeProteinTable(std::vector<ProteinEntry> & proteins, std::vector<Size> & reindexed_proteins, String & output_file); // not implemented
214  // void writeProteinGroups(std::vector<ISDGroup> & isd_groups, std::vector<MSDGroup> & msd_groups, String & output_file); // not implemented
215 
222  void countTargetDecoy(std::vector<MSDGroup> & msd_groups, ConsensusMap & consensus);
223 
231  void countTargetDecoy(std::vector<MSDGroup> & msd_groups, std::vector<PeptideIdentification> & peptide_nodes);
232 
233  void clearResult();
234 
235  void setProteinData(std::vector<FASTAFile::FASTAEntry> & protein_data);
236 
237  const std::vector<ResolverResult> & getResults();
238 
239  //overloaded functions -- return a const reference to a PeptideIdentification object or a peptideHit either from a consensusMap or a vector<PeptideIdentification>
240  static const PeptideIdentification & getPeptideIdentification(const ConsensusMap & consensus, const PeptideEntry * peptide);
241  static const PeptideHit & getPeptideHit(const ConsensusMap & consensus, const PeptideEntry * peptide);
242  static const PeptideIdentification & getPeptideIdentification(const std::vector<PeptideIdentification> & peptide_nodes, const PeptideEntry * peptide);
243  static const PeptideHit & getPeptideHit(const std::vector<PeptideIdentification> & peptide_nodes, const PeptideEntry * peptide);
244 
245 private:
246 
247  std::vector<ResolverResult> resolver_result_;
248  std::vector<FASTAFile::FASTAEntry> protein_data_;
249 
250  void computeIntensityOfMSD_(std::vector<MSDGroup> & msd_groups);
251 
252  //travers Protein and peptide nodes. Once for building ISD groups and once for building MSD groups
253  void traversProtein_(ProteinEntry * prot_node, ISDGroup & group);
254  void traversProtein_(ProteinEntry * prot_node, MSDGroup & group);
255  void traversPeptide_(PeptideEntry * pep_node, ISDGroup & group);
256  void traversPeptide_(PeptideEntry * pep_node, MSDGroup & group);
257  //searches given sequence in all nodes and returns its index or nodes.size() if not found.
258  Size findPeptideEntry_(String seq, std::vector<PeptideEntry> & nodes);
259  //helper function for findPeptideEntry.
260  Size binarySearchNodes_(String & seq, std::vector<PeptideEntry> & nodes, Size start, Size end);
261  //includes all MSMS derived peptides into the graph --idXML
262  Size includeMSMSPeptides_(std::vector<PeptideIdentification> & peptide_identifications, std::vector<PeptideEntry> & peptide_nodes);
263  //TODO include run information for each peptide
264  //includes all MSMS derived peptides into the graph --consensusXML
265  Size includeMSMSPeptides_(ConsensusMap & consensus, std::vector<PeptideEntry> & peptide_nodes);
266  //Proteins and Peptides get reindexed, based on whether they belong to msd groups or not. Indexes of Proteins which are in an ISD group but in none of the MSD groups will not be used anymore.
267  void reindexingNodes_(std::vector<MSDGroup> & msd_groups, std::vector<Size> & reindexed_proteins, std::vector<Size> & reindexed_peptides);
268  //marks Proteins which have a unique peptide as primary. Uses reindexed vector, thus reindexingNodes has to be called before.
269  void primaryProteins_(std::vector<PeptideEntry> & peptide_nodes, std::vector<Size> & reindexed_peptides);
270  void buildingMSDGroups_(std::vector<MSDGroup> & msd_groups, std::vector<ISDGroup> & isd_groups);
271  void buildingISDGroups_(std::vector<ProteinEntry> & protein_nodes, std::vector<PeptideEntry> & peptide_nodes,
272  std::vector<ISDGroup> & isd_groups);
273  //not tested
274  //ProteinResolver::indistinguishableProteins(vector<MSDGroup>& msd_groups);
275 
276  }; // class
277 
278 } // namespace
279 
280 #endif // OPENMS_ANALYSIS_QUANTITATION_PROTEINRESOLVER_H
std::list< ProteinEntry * > proteins
Definition: ProteinResolver.h:132
Definition: ProteinResolver.h:130
std::list< ProteinEntry * > proteins
Definition: ProteinResolver.h:120
Size index
Definition: ProteinResolver.h:109
float Real
Real type.
Definition: Types.h:109
std::vector< Size > * reindexed_proteins
Definition: ProteinResolver.h:146
Size msd_group
Definition: ProteinResolver.h:110
A more convenient string class.
Definition: String.h:56
ISDGroup * isd_group
Definition: ProteinResolver.h:123
Size index
Definition: ProteinResolver.h:134
std::vector< PeptideIdentification > * peptide_identification
Definition: ProteinResolver.h:148
Size peptide_hit
Definition: ProteinResolver.h:108
std::vector< PeptideEntry > * peptide_entries
Definition: ProteinResolver.h:144
std::list< PeptideEntry * > peptides
Definition: ProteinResolver.h:87
Size isd_group
Definition: ProteinResolver.h:97
Real intensity
Definition: ProteinResolver.h:127
String identifier
Definition: ProteinResolver.h:140
Definition: ProteinResolver.h:118
std::vector< FASTAFile::FASTAEntry > protein_data_
Definition: ProteinResolver.h:248
Size number_of_decoy
Definition: ProteinResolver.h:124
Size index
Definition: ProteinResolver.h:95
A container for consensus elements.
Definition: ConsensusMap.h:60
Size peptide_identification
Definition: ProteinResolver.h:107
std::vector< Size > * reindexed_peptides
Definition: ProteinResolver.h:145
std::list< PeptideEntry * > peptides
Definition: ProteinResolver.h:121
std::list< ProteinEntry * > indis
Definition: ProteinResolver.h:94
Size index
Definition: ProteinResolver.h:122
Size number_of_target_plus_decoy
Definition: ProteinResolver.h:126
bool traversed
Definition: ProteinResolver.h:105
Helper class for peptide and protein quantification based on feature data annotated with IDs...
Definition: ProteinResolver.h:59
Size isd_group
Definition: ProteinResolver.h:111
Size number_of_experimental_peptides
Definition: ProteinResolver.h:98
std::vector< ProteinEntry > * protein_entries
Definition: ProteinResolver.h:143
type
Definition: ProteinResolver.h:147
String origin
Definition: ProteinResolver.h:114
Size msd_group
Definition: ProteinResolver.h:96
Real coverage
Definition: ProteinResolver.h:92
std::list< PeptideEntry * > peptides
Definition: ProteinResolver.h:133
std::list< Size > msd_groups
Definition: ProteinResolver.h:135
Representation of a peptide hit.
Definition: PeptideHit.h:54
Definition: ProteinResolver.h:85
bool experimental
Definition: ProteinResolver.h:112
FASTAFile::FASTAEntry * fasta_entry
Definition: ProteinResolver.h:89
Definition: ProteinResolver.h:138
ConsensusMap * consensus_map
Definition: ProteinResolver.h:149
String sequence
Definition: ProteinResolver.h:106
DoubleReal weight
Definition: ProteinResolver.h:91
Size number_of_target
Definition: ProteinResolver.h:125
std::vector< ResolverResult > resolver_result_
Definition: ProteinResolver.h:247
std::vector< MSDGroup > * msds
Definition: ProteinResolver.h:142
bool traversed
Definition: ProteinResolver.h:88
std::vector< ISDGroup > * isds
Definition: ProteinResolver.h:141
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition: Types.h:144
Definition: ProteinResolver.h:102
FASTA entry type (identifier, description and sequence)
Definition: FASTAFile.h:61
Real intensity
Definition: ProteinResolver.h:113
A base class for all classes handling default parameters.
Definition: DefaultParamHandler.h:90
std::list< ProteinEntry * > proteins
Definition: ProteinResolver.h:104
type
Definition: ProteinResolver.h:90
Represents the peptide hits for a spectrum.
Definition: PeptideIdentification.h:63

OpenMS / TOPP release 1.11.1 Documentation generated on Thu Nov 14 2013 11:19:20 using doxygen 1.8.5