claw::text::kmp< RandomIterator > Class Template Reference

Exact pattern finding with the Knuth-Morris-Pratt's algorithm. More...

#include <kmp.hpp>

List of all members.

Public Member Functions

template<class UnaryPredicate >
void operator() (const RandomIterator pattern_begin, const RandomIterator pattern_end, const RandomIterator text_begin, const RandomIterator text_end, UnaryPredicate &action) const
 Pattern matching with the Knuth-Morris-Pratt's algorithm.

Private Member Functions

unsigned int common_prefix_length (const RandomIterator begin_1, const RandomIterator begin_2, const RandomIterator end_1, const RandomIterator end_2) const
 Calculate the length of the longest common prefix between two patterns.
void z_boxes (const RandomIterator begin, const RandomIterator end, std::map< unsigned int, unsigned int > &out) const
 Preprocessing. Calculate the pattern's z-boxes.
void spi_prime (const RandomIterator begin, const RandomIterator end, std::map< unsigned int, unsigned int > &out) const
 Preprocessing. Calculate the pattern's spi' values.

Detailed Description

template<class RandomIterator>
class claw::text::kmp< RandomIterator >

Exact pattern finding with the Knuth-Morris-Pratt's algorithm.

Author:
Julien Jorge

Definition at line 44 of file kmp.hpp.


Member Function Documentation

template<class RandomIterator >
unsigned int claw::text::kmp< RandomIterator >::common_prefix_length ( const RandomIterator  begin_1,
const RandomIterator  begin_2,
const RandomIterator  end_1,
const RandomIterator  end_2 
) const [inline, private]

Calculate the length of the longest common prefix between two patterns.

Parameters:
begin_1 Iterator on the first item in the first pattern.
begin_2 Iterator on the first item in the second pattern.
end_1 Iterator after the last item in the first pattern.
end_2 Iterator after the last item in the second pattern.

Definition at line 46 of file kmp.tpp.

Referenced by claw::text::kmp< RandomIterator >::z_boxes().

00050 {
00051   unsigned int count = 0;
00052   RandomIterator it_1 = begin_1, it_2 = begin_2;
00053   bool quit = false;
00054 
00055   while ( (it_1 != end_1) && (it_2 != end_2) && ! quit )
00056     if ( *it_1 == *it_2 )
00057       {
00058         ++it_1;
00059         ++it_2;
00060         ++count;
00061       }
00062     else
00063       quit = true;
00064 
00065   return count;
00066 } // common_prefix_length()

template<class RandomIterator >
template<class UnaryPredicate >
void claw::text::kmp< RandomIterator >::operator() ( const RandomIterator  pattern_begin,
const RandomIterator  pattern_end,
const RandomIterator  text_begin,
const RandomIterator  text_end,
UnaryPredicate &  action 
) const [inline]

Pattern matching with the Knuth-Morris-Pratt's algorithm.

Parameters:
pattern_begin Iterator on the first item in the pattern.
pattern_end Iterator after the last item in the pattern.
text_begin Iterator on the first item in the text.
text_end Iterator after the last item in the text.
action Predicate called with the last found position for the pattern.
Remarks:
Exits if action return false.
Precondition:
pattern_begin != pattern_end

Definition at line 174 of file kmp.tpp.

References claw::it_index< T >::set().

00177 {
00178   std::map<unsigned int, unsigned int> spi; // pattern's spi'
00179   claw::it_index<RandomIterator> it_p(pattern_begin,1);
00180   claw::it_index<RandomIterator> it_t(text_begin,1);
00181   bool stop = false;    // result of the last call to the predicate action
00182 
00183   const int pattern_length = pattern_end - pattern_begin;
00184   const int text_length    = text_end    - text_begin;
00185 
00186   assert(pattern_begin != pattern_end);
00187 
00188   spi_prime(pattern_begin, pattern_end, spi);
00189 
00190   unsigned int i = 0;
00191 
00192   while ( ((int)it_t <= text_length - (pattern_length - it_p)) && !stop )
00193     {
00194       unsigned int common;
00195 
00196       common = common_prefix_length(it_p, it_t, pattern_end, text_end);
00197       i += common;
00198       it_p += common;
00199       it_t += common;
00200 
00201       if (it_p == 1)
00202   ++it_t;
00203       else
00204         {
00205     if ( (int)it_p == pattern_length+1 )
00206       stop = !action( (int)it_t - pattern_length-1 );
00207 
00208     i = spi[i-1];
00209     it_p.set(pattern_begin+i, i+1);
00210         }
00211     }
00212 } // operator()

template<class RandomIterator >
void claw::text::kmp< RandomIterator >::spi_prime ( const RandomIterator  begin,
const RandomIterator  end,
std::map< unsigned int, unsigned int > &  out 
) const [inline, private]

Preprocessing. Calculate the pattern's spi' values.

Parameters:
begin Iterator on the first item in the pattern.
end Iterator after the last item in the pattern.
out (out) Calculated spi'.
Remarks:
out contains only non-zero spi'.

Definition at line 138 of file kmp.tpp.

References claw::text::kmp< RandomIterator >::z_boxes().

00141 {
00142   std::map<unsigned int, unsigned int> z;    // pattern's Z-boxes.
00143   unsigned int j;                            // loop index.
00144 
00145   // set Z-boxes
00146   z_boxes(begin, end, z);
00147 
00148   // calculates spi' (from end to begining)
00149   j=end-begin;
00150 
00151   do
00152     {
00153       --j;
00154       if (z.find(j) != z.end())
00155         out[j + z[j] - 1] = z[j];
00156     }
00157   while (j!=0);
00158 } // spi_prime()

template<class RandomIterator >
void claw::text::kmp< RandomIterator >::z_boxes ( const RandomIterator  begin,
const RandomIterator  end,
std::map< unsigned int, unsigned int > &  out 
) const [inline, private]

Preprocessing. Calculate the pattern's z-boxes.

Parameters:
begin Iterator on the first item in the pattern.
end Iterator after the last item in the pattern.
out (out) Calculated z-boxes.
Remarks:
out contains only non-zero z-boxes.

Definition at line 77 of file kmp.tpp.

References claw::text::kmp< RandomIterator >::common_prefix_length().

Referenced by claw::text::kmp< RandomIterator >::spi_prime().

00080 {
00081   // right and left bounds of the current item's Z-box.
00082   claw::it_index<RandomIterator> it_r(begin);
00083   claw::it_index<RandomIterator> it_l(begin); 
00084   claw::it_index<RandomIterator> it_k(begin);      // increment on the items
00085   unsigned int z;                   // le Zi of the current position
00086 
00087   for (++it_k; it_k!=end; ++it_k)
00088     {
00089       if (it_k > it_r)
00090         {
00091           z = common_prefix_length(begin, it_k, end, end);
00092                   
00093           if ( z > 0 ) // set the Z-box
00094             {
00095               out[it_k] = z;
00096               it_l = it_k;
00097               it_r = it_k.operator+(z) - 1;
00098             }
00099         }
00100       else /* k <= r */
00101         {
00102           unsigned int k_bis = it_k - it_l;
00103     claw::it_index<RandomIterator> it_b(it_r - it_k);
00104 
00105           if ( out.find(k_bis) == out.end() )
00106             z = 0;
00107           else
00108             z = out[k_bis];
00109 
00110           if ( z <= (unsigned int)it_b )
00111             {
00112               if ( z > 0 )
00113                 out[it_k] = z;
00114             }
00115           else
00116             {
00117               claw::it_index<RandomIterator> it_q = it_r + 1;
00118                           
00119               it_q += common_prefix_length(it_q, it_b+1, end, end);
00120 
00121               out[it_k] = it_q - it_k;
00122               it_r = it_q - 1;
00123               it_l = it_k;
00124             }
00125         }
00126     }
00127 } // z_boxes()


The documentation for this class was generated from the following files:

Generated on 9 Nov 2009 for CLAW Library (a C++ Library Absolutely Wonderful) by  doxygen 1.6.1