Home  · Classes  · Annotated Classes  · Modules  · Members  · Namespaces  · Related Pages
MzTabFile.h
Go to the documentation of this file.
1 // --------------------------------------------------------------------------
2 // OpenMS -- Open-Source Mass Spectrometry
3 // --------------------------------------------------------------------------
4 // Copyright The OpenMS Team -- Eberhard Karls University Tuebingen,
5 // ETH Zurich, and Freie Universitaet Berlin 2002-2013.
6 //
7 // This software is released under a three-clause BSD license:
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above copyright
11 // notice, this list of conditions and the following disclaimer in the
12 // documentation and/or other materials provided with the distribution.
13 // * Neither the name of any author or any participating institution
14 // may be used to endorse or promote products derived from this software
15 // without specific prior written permission.
16 // For a full list of authors, refer to the file AUTHORS.
17 // --------------------------------------------------------------------------
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 // ARE DISCLAIMED. IN NO EVENT SHALL ANY OF THE AUTHORS OR THE CONTRIBUTING
22 // INSTITUTIONS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
25 // OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
27 // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
28 // ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 //
30 // --------------------------------------------------------------------------
31 // $Maintainer: Timo Sachsenberg $
32 // $Authors: Timo Sachsenberg $
33 // --------------------------------------------------------------------------
34 
35 #ifndef OPENMS_FORMAT_MZTABFILE_H
36 #define OPENMS_FORMAT_MZTABFILE_H
37 
38 #include <OpenMS/FORMAT/MzTab.h>
44 
45 #include <boost/math/special_functions/fpclassify.hpp>
46 
47 #include <vector>
48 #include <algorithm>
49 
50 namespace OpenMS
51 {
57  class OPENMS_DLLAPI MzTabFile
58  {
59 public:
61  MzTabFile();
63  ~MzTabFile();
64 
65  typedef std::map<std::pair<String, String>, std::vector<PeptideHit> > MapAccPepType;
66 
67  // (deprecated) TODO: use conversion to MzTab data structure and store function below
68  void store(const String & filename, const std::vector<ProteinIdentification> & protein_ids, const std::vector<PeptideIdentification> & peptide_ids, String in, String document_id) const;
69 
70  // store MzTab file
71  void store(const String & filename, const MzTab& mz_tab) const;
72 
73  // load MzTab file
74  void load(const String & filename, MzTab& mz_tab);
75 
76 protected:
77  void generateMzTabMetaDataSection_(const MzTabMetaData& map, StringList& sl) const;
78 
79  String generateMzTabProteinHeader_(Int n_subsamples, const std::vector<String>& optional_protein_columns) const;
80 
81  String generateMzTabProteinSectionRow_(const MzTabProteinSectionRow& row, const String& unit_id) const;
82 
83  void generateMzTabProteinSection_(const MzTabProteinSectionData& map, StringList& sl) const;
84 
85  void generateMzTabPeptideSection_(const MzTabPeptideSectionData& map, StringList& sl) const;
86 
87  void generateMzTabSmallMoleculeSection_(const MzTabSmallMoleculeSectionData & map, StringList& sl) const;
88 
89  String generateMzTabPeptideHeader_(Int n_subsamples, const std::vector<String>& optional_protein_columns) const;
90 
91  String generateMzTabPeptideSectionRow_(const MzTabPeptideSectionRow& row, const String& unit_id) const;
92 
93  String generateMzTabSmallMoleculeHeader_(Int n_subsamples, const std::vector<String>& optional_smallmolecule_columns) const;
94 
95  String generateMzTabSmallMoleculeSectionRow_(const MzTabSmallMoleculeSectionRow& row, const String& unit_id) const;
96 
97  // auxiliar functions
98 
99  static void sortPSM_(std::vector<PeptideIdentification>::iterator begin, std::vector<PeptideIdentification>::iterator end);
100 
101  static void keepFirstPSM_(std::vector<PeptideIdentification>::iterator begin, std::vector<PeptideIdentification>::iterator end);
102 
104  static void partitionIntoRuns_(const std::vector<PeptideIdentification> & pep_ids,
105  const std::vector<ProteinIdentification> & pro_ids,
106  std::map<String, std::vector<PeptideIdentification> > & map_run_to_pepids,
107  std::map<String, std::vector<ProteinIdentification> > & map_run_to_proids
108  );
109 
110 
112  static void createProteinToPeptideLinks_(const std::map<String, std::vector<PeptideIdentification> > & map_run_to_pepids, MapAccPepType & map_run_accession_to_pephits);
113 
115  static String extractProteinAccession_(const PeptideHit & peptide_hit);
116 
118  static String extractPeptideModifications_(const PeptideHit & peptide_hit);
119 
121  static String mapSearchEngineToCvParam_(const String & openms_search_engine_name);
122 
123  static String mapSearchEngineScoreToCvParam_(const String & openms_search_engine_name, DoubleReal score, String score_type);
124 
125  static String extractNumPeptides_(const String & common_identifier, const String & protein_accession,
126  const MapAccPepType & map_run_accesion_to_peptides);
127 
128  // mzTab definition of distinct
129  static String extractNumPeptidesDistinct_(String common_identifier, String protein_accession,
130  const MapAccPepType & map_run_accesion_to_peptides);
131 
132  // same as distinct but additional constraint of uniquenes (=maps to exactly one Protein)
133  static String extractNumPeptidesUnambiguous_(String common_identifier, String protein_accession,
134  const MapAccPepType & map_run_accesion_to_peptides);
135 
136  static std::map<String, Size> extractNumberOfSubSamples_(const std::map<String, std::vector<ProteinIdentification> > & map_run_to_proids);
137 
138  static void writePeptideHeader_(SVOutStream & output, std::map<String, Size> n_sub_samples);
139 
140  static void writeProteinHeader_(SVOutStream & output, std::map<String, Size> n_sub_samples);
141 
142  static void writeProteinData_(SVOutStream & output,
143  const ProteinIdentification & prot_id,
144  Size run_count,
145  String input_filename,
146  bool has_coverage,
147  const MapAccPepType & map_run_accesion_to_peptides,
148  const std::map<String, Size> & map_run_to_num_sub
149  );
150 
151  };
152 
153 } // namespace OpenMS
154 
155 #endif // OPENMS_FORMAT_MZTABFILE_H
Representation of a protein identification run.
Definition: ProteinIdentification.h:61
std::map< String, MzTabProteinSectionRows > MzTabProteinSectionData
Definition: MzTab.h:1343
A more convenient string class.
Definition: String.h:56
Definition: MzTab.h:1251
std::map< String, MzTabUnitIdMetaData > MzTabMetaData
Definition: MzTab.h:1341
std::map< String, MzTabSmallMoleculeSectionRows > MzTabSmallMoleculeSectionData
Definition: MzTab.h:1347
Representation of a peptide hit.
Definition: PeptideHit.h:54
File adapter for MzTab files.
Definition: MzTabFile.h:57
std::map< String, MzTabPeptideSectionRows > MzTabPeptideSectionData
Definition: MzTab.h:1345
String list.
Definition: StringList.h:56
Definition: MzTab.h:1285
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition: Types.h:144
Stream class for writing to comma/tab/...-separated values files.
Definition: SVOutStream.h:51
Definition: MzTab.h:1309
int Int
Signed integer type.
Definition: Types.h:100
std::map< std::pair< String, String >, std::vector< PeptideHit > > MapAccPepType
Definition: MzTabFile.h:65
Data model of MzTab files. Please see the official MzTab specification at https://code.google.com/p/mztab/.
Definition: MzTab.h:1355

OpenMS / TOPP release 1.11.1 Documentation generated on Thu Nov 14 2013 11:19:18 using doxygen 1.8.5