Exact pattern finding with the Knuth-Morris-Pratt's algorithm. More...
#include <kmp.hpp>
Public Member Functions | |
template<class UnaryPredicate > | |
void | operator() (const RandomIterator pattern_begin, const RandomIterator pattern_end, const RandomIterator text_begin, const RandomIterator text_end, UnaryPredicate &action) const |
Pattern matching with the Knuth-Morris-Pratt's algorithm. | |
Private Member Functions | |
unsigned int | common_prefix_length (const RandomIterator begin_1, const RandomIterator begin_2, const RandomIterator end_1, const RandomIterator end_2) const |
Calculate the length of the longest common prefix between two patterns. | |
void | z_boxes (const RandomIterator begin, const RandomIterator end, std::map< unsigned int, unsigned int > &out) const |
Preprocessing. Calculate the pattern's z-boxes. | |
void | spi_prime (const RandomIterator begin, const RandomIterator end, std::map< unsigned int, unsigned int > &out) const |
Preprocessing. Calculate the pattern's spi' values. |
Exact pattern finding with the Knuth-Morris-Pratt's algorithm.
Definition at line 44 of file kmp.hpp.
unsigned int claw::text::kmp< RandomIterator >::common_prefix_length | ( | const RandomIterator | begin_1, | |
const RandomIterator | begin_2, | |||
const RandomIterator | end_1, | |||
const RandomIterator | end_2 | |||
) | const [inline, private] |
Calculate the length of the longest common prefix between two patterns.
begin_1 | Iterator on the first item in the first pattern. | |
begin_2 | Iterator on the first item in the second pattern. | |
end_1 | Iterator after the last item in the first pattern. | |
end_2 | Iterator after the last item in the second pattern. |
Definition at line 46 of file kmp.tpp.
Referenced by claw::text::kmp< RandomIterator >::z_boxes().
00050 { 00051 unsigned int count = 0; 00052 RandomIterator it_1 = begin_1, it_2 = begin_2; 00053 bool quit = false; 00054 00055 while ( (it_1 != end_1) && (it_2 != end_2) && ! quit ) 00056 if ( *it_1 == *it_2 ) 00057 { 00058 ++it_1; 00059 ++it_2; 00060 ++count; 00061 } 00062 else 00063 quit = true; 00064 00065 return count; 00066 } // common_prefix_length()
void claw::text::kmp< RandomIterator >::operator() | ( | const RandomIterator | pattern_begin, | |
const RandomIterator | pattern_end, | |||
const RandomIterator | text_begin, | |||
const RandomIterator | text_end, | |||
UnaryPredicate & | action | |||
) | const [inline] |
Pattern matching with the Knuth-Morris-Pratt's algorithm.
pattern_begin | Iterator on the first item in the pattern. | |
pattern_end | Iterator after the last item in the pattern. | |
text_begin | Iterator on the first item in the text. | |
text_end | Iterator after the last item in the text. | |
action | Predicate called with the last found position for the pattern. |
Definition at line 174 of file kmp.tpp.
References claw::it_index< T >::set().
00177 { 00178 std::map<unsigned int, unsigned int> spi; // pattern's spi' 00179 claw::it_index<RandomIterator> it_p(pattern_begin,1); 00180 claw::it_index<RandomIterator> it_t(text_begin,1); 00181 bool stop = false; // result of the last call to the predicate action 00182 00183 const int pattern_length = pattern_end - pattern_begin; 00184 const int text_length = text_end - text_begin; 00185 00186 assert(pattern_begin != pattern_end); 00187 00188 spi_prime(pattern_begin, pattern_end, spi); 00189 00190 unsigned int i = 0; 00191 00192 while ( ((int)it_t <= text_length - (pattern_length - it_p)) && !stop ) 00193 { 00194 unsigned int common; 00195 00196 common = common_prefix_length(it_p, it_t, pattern_end, text_end); 00197 i += common; 00198 it_p += common; 00199 it_t += common; 00200 00201 if (it_p == 1) 00202 ++it_t; 00203 else 00204 { 00205 if ( (int)it_p == pattern_length+1 ) 00206 stop = !action( (int)it_t - pattern_length-1 ); 00207 00208 i = spi[i-1]; 00209 it_p.set(pattern_begin+i, i+1); 00210 } 00211 } 00212 } // operator()
void claw::text::kmp< RandomIterator >::spi_prime | ( | const RandomIterator | begin, | |
const RandomIterator | end, | |||
std::map< unsigned int, unsigned int > & | out | |||
) | const [inline, private] |
Preprocessing. Calculate the pattern's spi' values.
begin | Iterator on the first item in the pattern. | |
end | Iterator after the last item in the pattern. | |
out | (out) Calculated spi'. |
Definition at line 138 of file kmp.tpp.
References claw::text::kmp< RandomIterator >::z_boxes().
00141 { 00142 std::map<unsigned int, unsigned int> z; // pattern's Z-boxes. 00143 unsigned int j; // loop index. 00144 00145 // set Z-boxes 00146 z_boxes(begin, end, z); 00147 00148 // calculates spi' (from end to begining) 00149 j=end-begin; 00150 00151 do 00152 { 00153 --j; 00154 if (z.find(j) != z.end()) 00155 out[j + z[j] - 1] = z[j]; 00156 } 00157 while (j!=0); 00158 } // spi_prime()
void claw::text::kmp< RandomIterator >::z_boxes | ( | const RandomIterator | begin, | |
const RandomIterator | end, | |||
std::map< unsigned int, unsigned int > & | out | |||
) | const [inline, private] |
Preprocessing. Calculate the pattern's z-boxes.
begin | Iterator on the first item in the pattern. | |
end | Iterator after the last item in the pattern. | |
out | (out) Calculated z-boxes. |
Definition at line 77 of file kmp.tpp.
References claw::text::kmp< RandomIterator >::common_prefix_length().
Referenced by claw::text::kmp< RandomIterator >::spi_prime().
00080 { 00081 // right and left bounds of the current item's Z-box. 00082 claw::it_index<RandomIterator> it_r(begin); 00083 claw::it_index<RandomIterator> it_l(begin); 00084 claw::it_index<RandomIterator> it_k(begin); // increment on the items 00085 unsigned int z; // le Zi of the current position 00086 00087 for (++it_k; it_k!=end; ++it_k) 00088 { 00089 if (it_k > it_r) 00090 { 00091 z = common_prefix_length(begin, it_k, end, end); 00092 00093 if ( z > 0 ) // set the Z-box 00094 { 00095 out[it_k] = z; 00096 it_l = it_k; 00097 it_r = it_k.operator+(z) - 1; 00098 } 00099 } 00100 else /* k <= r */ 00101 { 00102 unsigned int k_bis = it_k - it_l; 00103 claw::it_index<RandomIterator> it_b(it_r - it_k); 00104 00105 if ( out.find(k_bis) == out.end() ) 00106 z = 0; 00107 else 00108 z = out[k_bis]; 00109 00110 if ( z <= (unsigned int)it_b ) 00111 { 00112 if ( z > 0 ) 00113 out[it_k] = z; 00114 } 00115 else 00116 { 00117 claw::it_index<RandomIterator> it_q = it_r + 1; 00118 00119 it_q += common_prefix_length(it_q, it_b+1, end, end); 00120 00121 out[it_k] = it_q - it_k; 00122 it_r = it_q - 1; 00123 it_l = it_k; 00124 } 00125 } 00126 } 00127 } // z_boxes()