Home  · Classes  · Annotated Classes  · Modules  · Members  · Namespaces  · Related Pages
CachedmzML.h
Go to the documentation of this file.
1 // --------------------------------------------------------------------------
2 // OpenMS -- Open-Source Mass Spectrometry
3 // --------------------------------------------------------------------------
4 // Copyright The OpenMS Team -- Eberhard Karls University Tuebingen,
5 // ETH Zurich, and Freie Universitaet Berlin 2002-2013.
6 //
7 // This software is released under a three-clause BSD license:
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above copyright
11 // notice, this list of conditions and the following disclaimer in the
12 // documentation and/or other materials provided with the distribution.
13 // * Neither the name of any author or any participating institution
14 // may be used to endorse or promote products derived from this software
15 // without specific prior written permission.
16 // For a full list of authors, refer to the file AUTHORS.
17 // --------------------------------------------------------------------------
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 // ARE DISCLAIMED. IN NO EVENT SHALL ANY OF THE AUTHORS OR THE CONTRIBUTING
22 // INSTITUTIONS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
25 // OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
27 // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
28 // ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 //
30 // --------------------------------------------------------------------------
31 // $Maintainer: Hannes Roest $
32 // $Authors: Hannes Roest $
33 // --------------------------------------------------------------------------
34 
35 #ifndef OPENMS_ANALYSIS_OPENSWATH_CACHEDMZML_H
36 #define OPENMS_ANALYSIS_OPENSWATH_CACHEDMZML_H
37 
39 
40 #include <OpenMS/CONCEPT/Types.h>
42 
44 #include <OpenMS/FORMAT/MzMLFile.h>
45 
46 #include <fstream>
47 
48 #define MAGIC_NUMBER 8093
49 
50 namespace OpenMS
51 {
60  class OPENMS_DLLAPI CachedmzML :
61  public ProgressLogger
62  {
64  double dbl_field_;
65 
66 public:
67 
71 #if 1
72  // double means twice the file size
73  typedef double DatumSingleton;
74 #else
75  // float means half the file size
76  typedef float DatumSingleton;
77 #endif
78  typedef std::vector<DatumSingleton> Datavector;
79 
83  CachedmzML()
85  {
86  }
87 
90  {
91  }
92 
95  {
96  if (&rhs == this)
97  return *this;
98 
99  spectra_index_ = rhs.spectra_index_;
100  chrom_index_ = rhs.chrom_index_;
101 
102  return *this;
103  }
104 
106 
110  void writeMemdump(MapType& exp, String out)
112  {
113  std::ofstream ofs(out.c_str(), std::ios::binary);
114  Size exp_size = exp.size();
115  Size chrom_size = exp.getChromatograms().size();
116  int magic_number = MAGIC_NUMBER;
117  ofs.write((char*)&magic_number, sizeof(magic_number));
118  ofs.write((char*)&exp_size, sizeof(exp_size));
119  ofs.write((char*)&chrom_size, sizeof(chrom_size));
120 
121  startProgress(0, exp.size() + exp.getChromatograms().size(), "storing binary spectra");
122  for (Size i = 0; i < exp.size(); i++)
123  {
124  setProgress(i);
125  writeSpectrum_(exp[i], ofs);
126  }
127 
128  for (Size i = 0; i < exp.getChromatograms().size(); i++)
129  {
130  setProgress(i);
131  writeChromatogram_(exp.getChromatograms()[i], ofs);
132  }
133 
134  ofs.close();
135  endProgress();
136  }
137 
139  void readMemdump(MapType& exp_reading, String filename) const
140  {
141  std::ifstream ifs(filename.c_str(), std::ios::binary);
142  Size exp_size, chrom_size;
143  Peak1D current_peak;
144 
145  int magic_number;
146  ifs.read((char*)&magic_number, sizeof(magic_number));
147  if (magic_number != MAGIC_NUMBER)
148  {
149  throw "wrong file, does not start with MAGIC_NUMBER";
150  }
151 
152  ifs.read((char*)&exp_size, sizeof(exp_size));
153  ifs.read((char*)&chrom_size, sizeof(chrom_size));
154 
155  exp_reading.reserve(exp_size);
156  startProgress(0, exp_size + chrom_size, "reading binary spectra");
157  for (Size i = 0; i < exp_size; i++)
158  {
159  setProgress(i);
160  SpectrumType spectrum;
161  readSpectrum_(spectrum, ifs);
162  exp_reading.addSpectrum(spectrum);
163  }
164  std::vector<ChromatogramType> chromatograms;
165  for (Size i = 0; i < chrom_size; i++)
166  {
167  setProgress(i);
168  ChromatogramType chromatogram;
169  readChromatogram_(chromatogram, ifs);
170  chromatograms.push_back(chromatogram);
171  }
172  exp_reading.setChromatograms(chromatograms);
173 
174  ifs.close();
175  endProgress();
176  }
177 
179 
183  void readSingleSpectrum(MSSpectrum<Peak1D>& spectrum, const String& filename, const Size& idx) const
185  {
186  // open stream, read
187  std::ifstream ifs(filename.c_str(), std::ios::binary);
188  readSingleSpectrum(spectrum, ifs, idx);
189  }
190 
191  // Read a single spectrum from the given filestream
192  void readSingleSpectrum(MSSpectrum<Peak1D>& spectrum, std::ifstream& ifs, const Size& idx) const
193  {
194  // go to the specified index
195  ifs.seekg(idx);
196  readSpectrum_(spectrum, ifs);
197  }
198 
200 
204  const std::vector<Size>& getSpectraIndex() const
205  {
206  return spectra_index_;
207  }
208 
209  const std::vector<Size>& getChromatogramIndex() const
210  {
211  return chrom_index_;
212  }
213 
215 
217  void createMemdumpIndex(String filename)
218  {
219  std::ifstream ifs(filename.c_str(), std::ios::binary);
220  Size exp_size, chrom_size;
221  Peak1D current_peak;
222 
223  spectra_index_.clear();
224  chrom_index_.clear();
225  int magic_number;
226  int extra_offset = sizeof(dbl_field_) + sizeof(int_field_);
227  int chrom_offset = 0;
228 
229  ifs.read((char*)&magic_number, sizeof(magic_number));
230  if (magic_number != MAGIC_NUMBER)
231  {
232  throw "wrong file, does not start with MAGIC_NUMBER";
233  }
234 
235  // For spectra and chromatograms go through file, read the size of the
236  // spectrum/chromatogram and record the starting index of the element, then
237  // skip ahead to the next spectrum/chromatogram.
238  ifs.read((char*)&exp_size, sizeof(exp_size));
239  ifs.read((char*)&chrom_size, sizeof(chrom_size));
240  startProgress(0, exp_size + chrom_size, "Creating index for binary spectra");
241  for (Size i = 0; i < exp_size; i++)
242  {
243  setProgress(i);
244 
245  Size spec_size;
246  spectra_index_.push_back(ifs.tellg());
247  ifs.read((char*)&spec_size, sizeof(spec_size));
248  ifs.seekg((int)ifs.tellg() + extra_offset + (sizeof(DatumSingleton)) * 2 * (spec_size));
249 
250  }
251 
252  for (Size i = 0; i < chrom_size; i++)
253  {
254  setProgress(i);
255 
256  Size chrom_size;
257  chrom_index_.push_back(ifs.tellg());
258  ifs.read((char*)&chrom_size, sizeof(chrom_size));
259  ifs.seekg((int)ifs.tellg() + chrom_offset + (sizeof(DatumSingleton)) * 2 * (chrom_size));
260 
261  }
262 
263  ifs.close();
264  endProgress();
265  }
266 
268  void writeMetadata(MapType exp, String out_meta)
269  {
270  // delete the actual data for all spectra and chromatograms, leave only metadata
271  std::vector<MSChromatogram<ChromatogramPeak> > chromatograms = exp.getChromatograms(); // copy
272  for (Size i = 0; i < exp.size(); i++)
273  {
274  exp[i].clear(false);
275  }
276  for (Size i = 0; i < exp.getChromatograms().size(); i++)
277  {
278  // delete the actual data, leave only metadata
279  //exp.getChromatograms()[i].clear(false);
280  chromatograms[i].clear(false);
281  }
282  exp.setChromatograms(chromatograms);
283 
284  // store the meta data that is left in out_meta file
285  MzMLFile f;
286  f.store(out_meta, exp);
287  }
288 
291  OpenSwath::BinaryDataArrayPtr data2, std::ifstream& ifs, int ms_level,
292  double rt)
293  {
294  Size spec_size = -1;
295  ifs.read((char*) &spec_size, sizeof(spec_size));
296  ifs.read((char*) &ms_level, sizeof(ms_level));
297  ifs.read((char*) &rt, sizeof(rt));
298 
299  data1->data.resize(spec_size);
300  data2->data.resize(spec_size);
301  ifs.read((char*) &(data1->data)[0], spec_size * sizeof(double));
302  ifs.read((char*) &(data2->data)[0], spec_size * sizeof(double));
303  }
304 
307  OpenSwath::BinaryDataArrayPtr data2, std::ifstream& ifs)
308  {
309  Size spec_size = -1;
310  ifs.read((char*) &spec_size, sizeof(spec_size));
311  data1->data.resize(spec_size);
312 
313  data2->data.resize(spec_size);
314  ifs.read((char*) &(data1->data)[0], spec_size * sizeof(double));
315  ifs.read((char*) &(data2->data)[0], spec_size * sizeof(double));
316  }
317 
318 protected:
319 
320  // read a single spectrum directly into a datavector (assuming file is already at the correct position)
321  void readSpectrum_(Datavector& data1, Datavector& data2, std::ifstream& ifs, int& ms_level, double& rt) const
322  {
323  Size spec_size = -1;
324  ifs.read((char*)&spec_size, sizeof(spec_size));
325  ifs.read((char*)&ms_level, sizeof(ms_level));
326  ifs.read((char*)&rt, sizeof(rt));
327 
328  data1.resize(spec_size);
329  data2.resize(spec_size);
330  ifs.read((char*)&data1[0], spec_size * sizeof(DatumSingleton));
331  ifs.read((char*)&data2[0], spec_size * sizeof(DatumSingleton));
332  }
333 
334  // read a single chromatogram directly into a datavector (assuming file is already at the correct position)
335  void readChromatogram_(Datavector& data1, Datavector& data2, std::ifstream& ifs) const
336  {
337  Size spec_size = -1;
338  ifs.read((char*)&spec_size, sizeof(spec_size));
339  data1.resize(spec_size);
340  data2.resize(spec_size);
341  ifs.read((char*)&data1[0], spec_size * sizeof(DatumSingleton));
342  ifs.read((char*)&data2[0], spec_size * sizeof(DatumSingleton));
343  }
344 
345  // read a single spectrum directly into an OpenMS MSSpectrum (assuming file is already at the correct position)
346  void readSpectrum_(SpectrumType& spectrum, std::ifstream& ifs) const
347  {
348  Datavector mz_data;
349  Datavector int_data;
350 
351  int ms_level;
352  double rt;
353  readSpectrum_(mz_data, int_data, ifs, ms_level, rt);
354  spectrum.reserve(mz_data.size());
355  spectrum.setMSLevel(ms_level);
356  spectrum.setRT(rt);
357 
358  for (Size j = 0; j < mz_data.size(); j++)
359  {
360  Peak1D p;
361  p.setMZ(mz_data[j]);
362  p.setIntensity(int_data[j]);
363  spectrum.push_back(p);
364  }
365 
366  }
367 
368  // read a single chromatogram directly into an OpenMS MSChromatograms (assuming file is already at the correct position)
369  void readChromatogram_(ChromatogramType& chromatogram, std::ifstream& ifs) const
370  {
371  Datavector rt_data;
372  Datavector int_data;
373  readChromatogram_(rt_data, int_data, ifs);
374  chromatogram.reserve(rt_data.size());
375 
376  for (Size j = 0; j < rt_data.size(); j++)
377  {
379  p.setRT(rt_data[j]);
380  p.setIntensity(int_data[j]);
381  chromatogram.push_back(p);
382  }
383 
384  }
385 
386  // write a single spectrum to filestream
387  void writeSpectrum_(const SpectrumType& spectrum, std::ofstream& ofs)
388  {
389  Size exp_size = spectrum.size();
390  ofs.write((char*)&exp_size, sizeof(exp_size));
391  int_field_ = spectrum.getMSLevel();
392  ofs.write((char*)&int_field_, sizeof(int_field_));
393  dbl_field_ = spectrum.getRT();
394  ofs.write((char*)&dbl_field_, sizeof(dbl_field_));
395 
396 #if 0
397  ofs.write((char*)&exp[i].front(), exp[i].size() * sizeof(exp[i].front()));
398  std::cout << " storing spectrum " << i << " with size " << exp[i].size() << std::endl;
399 #else
400  Datavector mz_data;
401  Datavector int_data;
402  for (Size j = 0; j < spectrum.size(); j++)
403  {
404  mz_data.push_back(spectrum[j].getMZ());
405  int_data.push_back(spectrum[j].getIntensity());
406  }
407  ofs.write((char*)&mz_data.front(), mz_data.size() * sizeof(mz_data.front()));
408  ofs.write((char*)&int_data.front(), int_data.size() * sizeof(int_data.front()));
409 #endif
410  //std::cout << exp[i] << std::endl;
411  }
412 
413  // write a single chromatogram to filestream
414  void writeChromatogram_(const ChromatogramType& chromatogram, std::ofstream& ofs)
415  {
416  Size exp_size = chromatogram.size();
417  ofs.write((char*)&exp_size, sizeof(exp_size));
418  Datavector rt_data;
419  Datavector int_data;
420  for (Size j = 0; j < chromatogram.size(); j++)
421  {
422  rt_data.push_back(chromatogram[j].getRT());
423  int_data.push_back(chromatogram[j].getIntensity());
424  }
425  ofs.write((char*)&rt_data.front(), rt_data.size() * sizeof(rt_data.front()));
426  ofs.write((char*)&int_data.front(), int_data.size() * sizeof(int_data.front()));
427  }
428 
429  std::vector<Size> spectra_index_;
430  std::vector<Size> chrom_index_;
431 
432  };
433 }
434 #endif
void setRT(CoordinateType rt)
Mutable access to RT.
Definition: ChromatogramPeak.h:117
MSSpectrum< Peak1D > SpectrumType
Definition: CachedmzML.h:69
A more convenient string class.
Definition: String.h:56
void createMemdumpIndex(String filename)
Create an index on the location of all the spectra and chromatograms.
Definition: CachedmzML.h:217
UInt getMSLevel() const
Returns the MS level.
Definition: MSSpectrum.h:231
Size size() const
Definition: MSExperiment.h:117
std::vector< Size > spectra_index_
Definition: CachedmzML.h:429
The representation of a chromatogram.
Definition: MSChromatogram.h:53
void writeChromatogram_(const ChromatogramType &chromatogram, std::ofstream &ofs)
Definition: CachedmzML.h:414
double DatumSingleton
Definition: CachedmzML.h:73
void readSingleSpectrum(MSSpectrum< Peak1D > &spectrum, std::ifstream &ifs, const Size &idx) const
Definition: CachedmzML.h:192
MSChromatogram< ChromatogramPeak > ChromatogramType
Definition: CachedmzML.h:70
MSExperiment< Peak1D > MapType
Definition: CachedmzML.h:68
void setMZ(CoordinateType mz)
Mutable access to m/z.
Definition: Peak1D.h:114
void setIntensity(IntensityType intensity)
Mutable access to the data point intensity (height)
Definition: Peak1D.h:105
File adapter for MzML files.
Definition: MzMLFile.h:58
void readChromatogram_(ChromatogramType &chromatogram, std::ifstream &ifs) const
Definition: CachedmzML.h:369
void readMemdump(MapType &exp_reading, String filename) const
Read all spectra from a dump from the disk.
Definition: CachedmzML.h:139
void reserve(Size s)
Definition: MSExperiment.h:132
const std::vector< Size > & getSpectraIndex() const
Definition: CachedmzML.h:204
CachedmzML & operator=(const CachedmzML &rhs)
Assignment operator.
Definition: CachedmzML.h:94
A 1-dimensional raw data point or peak.
Definition: Peak1D.h:55
void setMSLevel(UInt ms_level)
Sets the MS level.
Definition: MSSpectrum.h:237
void store(const String &filename, const MapType &map) const
Stores a map in a MzML file.
Definition: MzMLFile.h:126
const std::vector< Size > & getChromatogramIndex() const
Definition: CachedmzML.h:209
OPENSWATHALGO_DLLAPI typedef boost::shared_ptr< BinaryDataArray > BinaryDataArrayPtr
Definition: ANALYSIS/OPENSWATH/OPENSWATHALGO/DATAACCESS/DataStructures.h:77
std::vector< DatumSingleton > Datavector
Definition: CachedmzML.h:78
void addSpectrum(const MSSpectrum< PeakT > &spectrum)
adds a spectra to the list
Definition: MSExperiment.h:738
#define MAGIC_NUMBER
Definition: CachedmzML.h:48
void readSpectrum_(SpectrumType &spectrum, std::ifstream &ifs) const
Definition: CachedmzML.h:346
void setChromatograms(const std::vector< MSChromatogram< ChromatogramPeakType > > &chromatograms)
sets the chromatogram list
Definition: MSExperiment.h:756
void writeSpectrum_(const SpectrumType &spectrum, std::ofstream &ofs)
Definition: CachedmzML.h:387
void setIntensity(IntensityType intensity)
Mutable access to the data point intensity (height)
Definition: ChromatogramPeak.h:108
void setRT(DoubleReal rt)
Sets the absolute retention time (is seconds)
Definition: MSSpectrum.h:221
An class that uses on-disk caching to read and write spectra and chromatograms.
Definition: CachedmzML.h:60
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition: Types.h:144
static void readChromatogramFast(OpenSwath::BinaryDataArrayPtr data1, OpenSwath::BinaryDataArrayPtr data2, std::ifstream &ifs)
fast access without copying
Definition: CachedmzML.h:306
void clear(bool clear_meta_data)
Clears all data and meta data.
Definition: MSExperiment.h:809
Base class for all classes that want to report their progess.
Definition: ProgressLogger.h:56
static void readSpectrumFast(OpenSwath::BinaryDataArrayPtr data1, OpenSwath::BinaryDataArrayPtr data2, std::ifstream &ifs, int ms_level, double rt)
fast access without copying
Definition: CachedmzML.h:290
void readSpectrum_(Datavector &data1, Datavector &data2, std::ifstream &ifs, int &ms_level, double &rt) const
Definition: CachedmzML.h:321
double dbl_field_
Definition: CachedmzML.h:64
A 1-dimensional raw data point or peak for chromatograms.
Definition: ChromatogramPeak.h:55
int int_field_
Definition: CachedmzML.h:63
void readChromatogram_(Datavector &data1, Datavector &data2, std::ifstream &ifs) const
Definition: CachedmzML.h:335
DoubleReal getRT() const
Definition: MSSpectrum.h:215
std::vector< Size > chrom_index_
Definition: CachedmzML.h:430
~CachedmzML()
Default destructor.
Definition: CachedmzML.h:89
void writeMetadata(MapType exp, String out_meta)
Write only the meta data of an MSExperiment.
Definition: CachedmzML.h:268
const std::vector< MSChromatogram< ChromatogramPeakType > > & getChromatograms() const
returns the chromatogram list
Definition: MSExperiment.h:768

OpenMS / TOPP release 1.11.1 Documentation generated on Thu Nov 14 2013 11:19:11 using doxygen 1.8.5