kmp.tpp
Go to the documentation of this file.00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00030 #include <map>
00031 #include <assert.h>
00032 #include <claw/it_index.hpp>
00033
00034
00043 template<class RandomIterator>
00044 unsigned int
00045 claw::text::kmp<RandomIterator>::
00046 common_prefix_length( const RandomIterator begin_1,
00047 const RandomIterator begin_2,
00048 const RandomIterator end_1, const RandomIterator end_2
00049 ) const
00050 {
00051 unsigned int count = 0;
00052 RandomIterator it_1 = begin_1, it_2 = begin_2;
00053 bool quit = false;
00054
00055 while ( (it_1 != end_1) && (it_2 != end_2) && ! quit )
00056 if ( *it_1 == *it_2 )
00057 {
00058 ++it_1;
00059 ++it_2;
00060 ++count;
00061 }
00062 else
00063 quit = true;
00064
00065 return count;
00066 }
00067
00068
00076 template<class RandomIterator>
00077 void claw::text::kmp<RandomIterator>::z_boxes(const RandomIterator begin,
00078 const RandomIterator end,
00079 std::map<unsigned int,unsigned int>& out) const
00080 {
00081
00082 claw::it_index<RandomIterator> it_r(begin);
00083 claw::it_index<RandomIterator> it_l(begin);
00084 claw::it_index<RandomIterator> it_k(begin);
00085 unsigned int z;
00086
00087 for (++it_k; it_k!=end; ++it_k)
00088 {
00089 if (it_k > it_r)
00090 {
00091 z = common_prefix_length(begin, it_k, end, end);
00092
00093 if ( z > 0 )
00094 {
00095 out[it_k] = z;
00096 it_l = it_k;
00097 it_r = it_k.operator+(z) - 1;
00098 }
00099 }
00100 else
00101 {
00102 unsigned int k_bis = it_k - it_l;
00103 claw::it_index<RandomIterator> it_b(it_r - it_k);
00104
00105 if ( out.find(k_bis) == out.end() )
00106 z = 0;
00107 else
00108 z = out[k_bis];
00109
00110 if ( z <= (unsigned int)it_b )
00111 {
00112 if ( z > 0 )
00113 out[it_k] = z;
00114 }
00115 else
00116 {
00117 claw::it_index<RandomIterator> it_q = it_r + 1;
00118
00119 it_q += common_prefix_length(it_q, it_b+1, end, end);
00120
00121 out[it_k] = it_q - it_k;
00122 it_r = it_q - 1;
00123 it_l = it_k;
00124 }
00125 }
00126 }
00127 }
00128
00129
00137 template<class RandomIterator>
00138 void claw::text::kmp<RandomIterator>::spi_prime(const RandomIterator begin,
00139 const RandomIterator end,
00140 std::map<unsigned int, unsigned int>& out) const
00141 {
00142 std::map<unsigned int, unsigned int> z;
00143 unsigned int j;
00144
00145
00146 z_boxes(begin, end, z);
00147
00148
00149 j=end-begin;
00150
00151 do
00152 {
00153 --j;
00154 if (z.find(j) != z.end())
00155 out[j + z[j] - 1] = z[j];
00156 }
00157 while (j!=0);
00158 }
00159
00160
00171 template<class RandomIterator>
00172 template<class UnaryPredicate>
00173 void claw::text::kmp<RandomIterator>::operator()
00174 (const RandomIterator pattern_begin, const RandomIterator pattern_end,
00175 const RandomIterator text_begin, const RandomIterator text_end,
00176 UnaryPredicate& action ) const
00177 {
00178 std::map<unsigned int, unsigned int> spi;
00179 claw::it_index<RandomIterator> it_p(pattern_begin,1);
00180 claw::it_index<RandomIterator> it_t(text_begin,1);
00181 bool stop = false;
00182
00183 const int pattern_length = pattern_end - pattern_begin;
00184 const int text_length = text_end - text_begin;
00185
00186 assert(pattern_begin != pattern_end);
00187
00188 spi_prime(pattern_begin, pattern_end, spi);
00189
00190 unsigned int i = 0;
00191
00192 while ( ((int)it_t <= text_length - (pattern_length - it_p)) && !stop )
00193 {
00194 unsigned int common;
00195
00196 common = common_prefix_length(it_p, it_t, pattern_end, text_end);
00197 i += common;
00198 it_p += common;
00199 it_t += common;
00200
00201 if (it_p == 1)
00202 ++it_t;
00203 else
00204 {
00205 if ( (int)it_p == pattern_length+1 )
00206 stop = !action( (int)it_t - pattern_length-1 );
00207
00208 i = spi[i-1];
00209 it_p.set(pattern_begin+i, i+1);
00210 }
00211 }
00212 }