Home  · Classes  · Annotated Classes  · Modules  · Members  · Namespaces  · Related Pages
FuzzyStringComparator.h
Go to the documentation of this file.
1 // --------------------------------------------------------------------------
2 // OpenMS -- Open-Source Mass Spectrometry
3 // --------------------------------------------------------------------------
4 // Copyright The OpenMS Team -- Eberhard Karls University Tuebingen,
5 // ETH Zurich, and Freie Universitaet Berlin 2002-2013.
6 //
7 // This software is released under a three-clause BSD license:
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above copyright
11 // notice, this list of conditions and the following disclaimer in the
12 // documentation and/or other materials provided with the distribution.
13 // * Neither the name of any author or any participating institution
14 // may be used to endorse or promote products derived from this software
15 // without specific prior written permission.
16 // For a full list of authors, refer to the file AUTHORS.
17 // --------------------------------------------------------------------------
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 // ARE DISCLAIMED. IN NO EVENT SHALL ANY OF THE AUTHORS OR THE CONTRIBUTING
22 // INSTITUTIONS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
25 // OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
27 // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
28 // ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 //
30 // --------------------------------------------------------------------------
31 // $Maintainer: Stephan Aiche $
32 // $Authors: Clemens Groepl, Stephan Aiche $
33 // --------------------------------------------------------------------------
34 
35 #ifndef OPENMS_CONCEPT_FUZZYSTRINGCOMPARATOR_H
36 #define OPENMS_CONCEPT_FUZZYSTRINGCOMPARATOR_H
37 
38 #include <OpenMS/CONCEPT/Types.h>
41 
42 #include <cstdlib> // for strtod()
43 #include <fstream>
44 #include <iostream>
45 #include <cctype> // for isspace()
46 #include <limits> // for NaN
47 #include <sstream>
48 #include <map>
49 
50 namespace OpenMS
51 {
52  namespace Internal
53  {
54  namespace ClassTest
55  {
56  void OPENMS_DLLAPI
57  testStringSimilar(const char * file, int line,
58  const std::string & string_1,
59  const char * string_1_stringified,
60  const std::string & string_2,
61  const char * string_2_stringified);
62  bool OPENMS_DLLAPI
63  isFileSimilar(const std::string &, const std::string &);
64  }
65  }
66 
71  class OPENMS_DLLAPI FuzzyStringComparator
72  {
73 
74  friend void OPENMS_DLLAPI
76  const char * file,
77  int line,
78  const std::string & string_1,
79  const char * string_1_stringified,
80  const std::string & string_2,
81  const char * string_2_stringified);
82  friend bool OPENMS_DLLAPI
83  Internal::ClassTest::isFileSimilar(const std::string &,
84  const std::string &);
85 
88  {
89  };
90 
91 public:
92 
94 
95 
98 
100  virtual
102 
105 
108  operator=(const FuzzyStringComparator & rhs);
109 
111 
113  const double &
115  {
116  return ratio_max_allowed_;
117  }
118 
120  void
121  setAcceptableRelative(const double rhs)
122  {
123  this->ratio_max_allowed_ = rhs;
124  if (ratio_max_allowed_ < 1.0)
125  ratio_max_allowed_ = 1
126  / ratio_max_allowed_;
127 
128  }
129 
131  const double &
133  {
134  return absdiff_max_allowed_;
135  }
136 
138  void
139  setAcceptableAbsolute(const double rhs)
140  {
141  this->absdiff_max_allowed_ = rhs;
142  if (absdiff_max_allowed_ < 0.0)
143  absdiff_max_allowed_
144  = -absdiff_max_allowed_;
145  }
146 
148  const StringList &
149  getWhitelist() const
150  {
151  return whitelist_;
152  }
153 
155  StringList &
157  {
158  return whitelist_;
159  }
160 
162  void
164  {
165  whitelist_ = rhs;
166  }
167 
176  const int &
178  {
179  return verbose_level_;
180  }
181 
190  void
191  setVerboseLevel(const int rhs)
192  {
193  this->verbose_level_ = rhs;
194  }
195 
198  const int &
199  getTabWidth() const
200  {
201  return tab_width_;
202  }
203 
206  void
207  setTabWidth(const int rhs)
208  {
209  this->tab_width_ = rhs;
210  }
211 
214  const int &
216  {
217  return first_column_;
218  }
219 
222  void
223  setFirstColumn(const int rhs)
224  {
225  this->first_column_ = rhs;
226  }
227 
233  std::ostream &
235  {
236  return *log_dest_;
237  }
238 
248  void
249  setLogDestination(std::ostream & rhs)
250  {
251  this->log_dest_ = &rhs;
252  }
253 
260  bool
261  compareStrings(std::string const & lhs, std::string const & rhs);
262 
270  bool
271  compareStreams(std::istream & input_1, std::istream & input_2);
272 
286  bool
287  compareFiles(const std::string & filename_1,
288  const std::string & filename_2);
289 
290 protected:
291 
299  bool
300  compareLines_(std::string const & line_str_1,
301  std::string const & line_str_2);
302 
304  void
305  reportSuccess_() const;
306 
309  void
310  reportFailure_(char const * const message) const;
311 
313  void writeWhitelistCases_(const std::string & prefix) const;
314 
317  void readNextLine_(std::istream & input_stream, std::string & line_string, int & line_number) const;
318 
320  bool openInputFileStream_(const std::string & filename, std::ifstream & input_stream) const;
321 
323  std::ostream * log_dest_;
324 
326  std::string input_1_name_;
328  std::string input_2_name_;
329 
331  struct InputLine
332  {
333  std::stringstream line_;
334  std::ios::pos_type line_position_;
335 
337  line_()
338  {
339  }
340 
342  void setToString(const std::string & s)
343  {
344  line_.str(s);
345  line_.seekp(0);
346  line_.clear();
347  line_.unsetf(std::ios::skipws);
348 
349  line_position_ = line_.tellg();
350  }
351 
354  {
355  line_position_ = (Int(line_.tellg()) != -1 ? line_.tellg() : std::ios::pos_type(line_.str().length())); // save current reading position
356  }
357 
360  {
361  line_.clear(); // reset status
362  line_.seekg(line_position_); // rewind to saved position
363  }
364 
372  bool ok() const
373  {
374  return !line_.fail(); // failbit AND badbit are both NOT set; using fail() seems the only portable solution for both C++98 and C++11
375  // operator bool() (C++11 only) and operator void*() (C++98 only) are both not very sexy since they are not "safe bool idiomic" and would require
376  // a macro here... So we use a real function name (both internally and externally)
377  }
378  };
379 
382 
385 
388 
389  std::string line_str_1_max_;
390  std::string line_str_2_max_;
391 
394 
396  double ratio_max_;
397 
400 
402  double absdiff_max_;
403 
406  {
407  double number;
408  unsigned char letter;
409  bool is_number;
410  bool is_space;
411 
413  number(0),
414  letter(0),
415  is_number(false),
416  is_space(false)
417  {}
418 
420  void reset()
421  {
422  is_number = false;
423  is_space = false;
424  letter = '\0';
425  number = std::numeric_limits<double>::quiet_NaN();
426  }
427 
429  void fillFromInputLine(InputLine & input_line)
430  {
431  // first reset all internal variables so we do not mess with
432  // old values
433  reset();
434 
435  input_line.updatePosition();
436  input_line.line_ >> letter; // read letter
437  if ((is_space = (isspace(letter) != 0))) // is whitespace?
438  {
439  input_line.line_ >> std::ws; // skip over further whitespace
440  }
441  else
442  {
443  input_line.seekGToSavedPosition();
444  if ((is_number = (bool(input_line.line_ >> number)))) // is a number? (explicit bool op for C11)
445  {
446  }
447  else
448  {
449  input_line.seekGToSavedPosition();
450  input_line.line_ >> letter; // read letter
451  }
452  }
453  }
454 
455  };
456 
461 
463  struct PrefixInfo_
464  {
468 
469  PrefixInfo_(const InputLine & input_line, const int tab_width_, const int first_column_) :
470  prefix(input_line.line_.str()), line_column(0)
471  {
472  prefix = prefix.prefix(size_t(input_line.line_position_));
473  prefix_whitespaces = prefix;
474  for (String::iterator iter = prefix_whitespaces.begin(); iter != prefix_whitespaces.end(); ++iter)
475  {
476  if (*iter != '\t')
477  {
478  * iter = ' ';
479  ++line_column;
480  }
481  else
482  {
483  line_column = (line_column / tab_width_ + 1) * tab_width_;
484  }
485  }
486  line_column += first_column_;
487  }
488 
489  };
490 
492 
496 
501 
504 
506  std::map<String, UInt> whitelist_cases_;
507 
508  }; // class FuzzyStringComparator
509 
510 } //namespace OpenMS
511 
512 #endif //OPENMS_CONCEPT_FUZZYSTRINGCOMPARATOR_H
StreamElement_ element_2_
Stores information about characters, numbers, and whitesspaces loaded from the second input stream...
Definition: FuzzyStringComparator.h:460
int verbose_level_
Definition: FuzzyStringComparator.h:493
void testStringSimilar(const char *file, int line, const std::string &string_1, const char *string_1_stringified, const std::string &string_2, const char *string_2_stringified)
Compare strings using absdiff_max_allowed and ratio_max_allowed.
A more convenient string class.
Definition: String.h:56
Internal exception class.
Definition: FuzzyStringComparator.h:87
Stores information about the current input line (i.e., stream for the line and the current position i...
Definition: FuzzyStringComparator.h:331
double ratio_max_allowed_
Maximum ratio of numbers allowed, see ratio_max_.
Definition: FuzzyStringComparator.h:393
Stores information about characters, numbers, and whitesspaces loaded from the InputStream.
Definition: FuzzyStringComparator.h:405
void seekGToSavedPosition()
Resets the stream to the last saved position.
Definition: FuzzyStringComparator.h:359
double absdiff_max_
Maximum difference of numbers observed so far, see absdiff_max_allowed_.
Definition: FuzzyStringComparator.h:402
int line_column
Definition: FuzzyStringComparator.h:467
double absdiff_max_allowed_
Maximum absolute difference of numbers allowed, see absdiff_max_.
Definition: FuzzyStringComparator.h:399
OpenMS::String prefix
Definition: FuzzyStringComparator.h:465
bool use_prefix_
use a prefix when reporting
Definition: FuzzyStringComparator.h:503
const int & getVerboseLevel() const
verbose level
Definition: FuzzyStringComparator.h:177
std::ostream * log_dest_
Log and results output goes here.
Definition: FuzzyStringComparator.h:323
void fillFromInputLine(InputLine &input_line)
Read the next element from an InputLine and update the InputLine accordingly.
Definition: FuzzyStringComparator.h:429
unsigned char letter
Definition: FuzzyStringComparator.h:408
void setTabWidth(const int rhs)
set tab width (for column numbers)
Definition: FuzzyStringComparator.h:207
void setLogDestination(std::ostream &rhs)
Log output is written to this destination.
Definition: FuzzyStringComparator.h:249
std::ios::pos_type line_position_
Definition: FuzzyStringComparator.h:334
bool is_space
Definition: FuzzyStringComparator.h:410
Fuzzy comparison of strings, tolerates numeric differences.
Definition: FuzzyStringComparator.h:71
std::string line_str_2_max_
Definition: FuzzyStringComparator.h:390
std::ostream & getLogDestination() const
Log output is written to this destination.
Definition: FuzzyStringComparator.h:234
int line_num_2_
Definition: FuzzyStringComparator.h:384
PrefixInfo_(const InputLine &input_line, const int tab_width_, const int first_column_)
Definition: FuzzyStringComparator.h:469
StringList & getWhitelist()
White list. If both lines contain the same element from this list, they are skipped over...
Definition: FuzzyStringComparator.h:156
std::map< String, UInt > whitelist_cases_
Definition: FuzzyStringComparator.h:506
const double & getAcceptableRelative() const
Acceptable relative error (a number &gt;= 1.0)
Definition: FuzzyStringComparator.h:114
void setAcceptableAbsolute(const double rhs)
Acceptable absolute difference (a number &gt;= 0.0)
Definition: FuzzyStringComparator.h:139
void setVerboseLevel(const int rhs)
verbose level
Definition: FuzzyStringComparator.h:191
bool is_number
Definition: FuzzyStringComparator.h:409
void setToString(const std::string &s)
Initialize the input line to the passed string.
Definition: FuzzyStringComparator.h:342
OpenMS::String prefix_whitespaces
Definition: FuzzyStringComparator.h:466
double number
Definition: FuzzyStringComparator.h:407
int first_column_
Definition: FuzzyStringComparator.h:495
int tab_width_
Definition: FuzzyStringComparator.h:494
double ratio_max_
Maximum ratio of numbers observed so far, see ratio_max_allowed_.
Definition: FuzzyStringComparator.h:396
int line_num_1_
Definition: FuzzyStringComparator.h:383
StreamElement_()
Definition: FuzzyStringComparator.h:412
StreamElement_ element_1_
Stores information about characters, numbers, and whitesspaces loaded from the first input stream...
Definition: FuzzyStringComparator.h:458
void reset()
reset all elements of the element to default value
Definition: FuzzyStringComparator.h:420
const int & getFirstColumn() const
get first column (for column numbers)
Definition: FuzzyStringComparator.h:215
StringList whitelist_
Definition: FuzzyStringComparator.h:505
InputLine input_line_2_
Definition: FuzzyStringComparator.h:381
bool is_status_success_
Has comparison been sucessful so far? Note: this flag is changed in reportFailure_();.
Definition: FuzzyStringComparator.h:500
void updatePosition()
Save current position of the stream.
Definition: FuzzyStringComparator.h:353
void setAcceptableRelative(const double rhs)
Acceptable relative error (a number &gt;= 1.0)
Definition: FuzzyStringComparator.h:121
std::string input_2_name_
Name of second input e.g., filename.
Definition: FuzzyStringComparator.h:328
void setFirstColumn(const int rhs)
set first column (for column numbers)
Definition: FuzzyStringComparator.h:223
InputLine()
Definition: FuzzyStringComparator.h:336
String list.
Definition: StringList.h:56
int line_num_1_max_
Definition: FuzzyStringComparator.h:386
std::string line_str_1_max_
Definition: FuzzyStringComparator.h:389
void setWhitelist(const StringList &rhs)
White list. If both lines contain the same element from this list, they are skipped over...
Definition: FuzzyStringComparator.h:163
std::stringstream line_
Definition: FuzzyStringComparator.h:333
const StringList & getWhitelist() const
White list. If both lines contain the same element from this list, they are skipped over...
Definition: FuzzyStringComparator.h:149
bool isFileSimilar(const std::string &filename_1, const std::string &filename_2)
Compare files using absdiff_max_allowed and ratio_max_allowed.
bool is_absdiff_small_
Definition: FuzzyStringComparator.h:491
const double & getAcceptableAbsolute() const
Acceptable absolute difference (a number &gt;= 0.0)
Definition: FuzzyStringComparator.h:132
Wrapper for the prefix information computed for the failure report.
Definition: FuzzyStringComparator.h:463
int line_num_2_max_
Definition: FuzzyStringComparator.h:387
bool ok() const
Definition: FuzzyStringComparator.h:372
int Int
Signed integer type.
Definition: Types.h:100
InputLine input_line_1_
Definition: FuzzyStringComparator.h:380
const int & getTabWidth() const
get tab width (for column numbers)
Definition: FuzzyStringComparator.h:199
std::string input_1_name_
Name of first input e.g., filename.
Definition: FuzzyStringComparator.h:326

OpenMS / TOPP release 1.11.1 Documentation generated on Thu Nov 14 2013 11:19:14 using doxygen 1.8.5