kmp.tpp

Go to the documentation of this file.
00001 /*
00002   CLAW - a C++ Library Absolutely Wonderful
00003 
00004   CLAW is a free library without any particular aim but being useful to 
00005   anyone.
00006 
00007   Copyright (C) 2005-2008 Julien Jorge
00008 
00009   This library is free software; you can redistribute it and/or
00010   modify it under the terms of the GNU Lesser General Public
00011   License as published by the Free Software Foundation; either
00012   version 2.1 of the License, or (at your option) any later version.
00013 
00014   This library is distributed in the hope that it will be useful,
00015   but WITHOUT ANY WARRANTY; without even the implied warranty of
00016   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00017   Lesser General Public License for more details.
00018 
00019   You should have received a copy of the GNU Lesser General Public
00020   License along with this library; if not, write to the Free Software
00021   Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
00022 
00023   contact: julien_jorge@yahoo.fr
00024 */
00030 #include <map>
00031 #include <assert.h>
00032 #include <claw/it_index.hpp>
00033 
00034 /*---------------------------------------------------------------------------*/
00043 template<class RandomIterator>
00044 unsigned int 
00045 claw::text::kmp<RandomIterator>::
00046 common_prefix_length( const RandomIterator begin_1,
00047                       const RandomIterator begin_2,
00048                       const RandomIterator end_1, const RandomIterator end_2 
00049                       ) const
00050 {
00051   unsigned int count = 0;
00052   RandomIterator it_1 = begin_1, it_2 = begin_2;
00053   bool quit = false;
00054 
00055   while ( (it_1 != end_1) && (it_2 != end_2) && ! quit )
00056     if ( *it_1 == *it_2 )
00057       {
00058         ++it_1;
00059         ++it_2;
00060         ++count;
00061       }
00062     else
00063       quit = true;
00064 
00065   return count;
00066 } // common_prefix_length()
00067 
00068 /*---------------------------------------------------------------------------*/
00076 template<class RandomIterator>
00077 void claw::text::kmp<RandomIterator>::z_boxes(const RandomIterator begin,
00078                                               const RandomIterator end,
00079                                               std::map<unsigned int,unsigned int>& out) const
00080 {
00081   // right and left bounds of the current item's Z-box.
00082   claw::it_index<RandomIterator> it_r(begin);
00083   claw::it_index<RandomIterator> it_l(begin); 
00084   claw::it_index<RandomIterator> it_k(begin);      // increment on the items
00085   unsigned int z;                   // le Zi of the current position
00086 
00087   for (++it_k; it_k!=end; ++it_k)
00088     {
00089       if (it_k > it_r)
00090         {
00091           z = common_prefix_length(begin, it_k, end, end);
00092                   
00093           if ( z > 0 ) // set the Z-box
00094             {
00095               out[it_k] = z;
00096               it_l = it_k;
00097               it_r = it_k.operator+(z) - 1;
00098             }
00099         }
00100       else /* k <= r */
00101         {
00102           unsigned int k_bis = it_k - it_l;
00103     claw::it_index<RandomIterator> it_b(it_r - it_k);
00104 
00105           if ( out.find(k_bis) == out.end() )
00106             z = 0;
00107           else
00108             z = out[k_bis];
00109 
00110           if ( z <= (unsigned int)it_b )
00111             {
00112               if ( z > 0 )
00113                 out[it_k] = z;
00114             }
00115           else
00116             {
00117               claw::it_index<RandomIterator> it_q = it_r + 1;
00118                           
00119               it_q += common_prefix_length(it_q, it_b+1, end, end);
00120 
00121               out[it_k] = it_q - it_k;
00122               it_r = it_q - 1;
00123               it_l = it_k;
00124             }
00125         }
00126     }
00127 } // z_boxes()
00128 
00129 /*---------------------------------------------------------------------------*/
00137 template<class RandomIterator>
00138 void claw::text::kmp<RandomIterator>::spi_prime(const RandomIterator begin,
00139                                                 const RandomIterator end,
00140                                                 std::map<unsigned int, unsigned int>& out) const
00141 {
00142   std::map<unsigned int, unsigned int> z;    // pattern's Z-boxes.
00143   unsigned int j;                            // loop index.
00144 
00145   // set Z-boxes
00146   z_boxes(begin, end, z);
00147 
00148   // calculates spi' (from end to begining)
00149   j=end-begin;
00150 
00151   do
00152     {
00153       --j;
00154       if (z.find(j) != z.end())
00155         out[j + z[j] - 1] = z[j];
00156     }
00157   while (j!=0);
00158 } // spi_prime()
00159 
00160 /*---------------------------------------------------------------------------*/
00171 template<class RandomIterator>
00172 template<class UnaryPredicate>
00173 void claw::text::kmp<RandomIterator>::operator()
00174   (const RandomIterator pattern_begin, const RandomIterator pattern_end,
00175    const RandomIterator text_begin, const RandomIterator text_end, 
00176    UnaryPredicate& action ) const
00177 {
00178   std::map<unsigned int, unsigned int> spi; // pattern's spi'
00179   claw::it_index<RandomIterator> it_p(pattern_begin,1);
00180   claw::it_index<RandomIterator> it_t(text_begin,1);
00181   bool stop = false;    // result of the last call to the predicate action
00182 
00183   const int pattern_length = pattern_end - pattern_begin;
00184   const int text_length    = text_end    - text_begin;
00185 
00186   assert(pattern_begin != pattern_end);
00187 
00188   spi_prime(pattern_begin, pattern_end, spi);
00189 
00190   unsigned int i = 0;
00191 
00192   while ( ((int)it_t <= text_length - (pattern_length - it_p)) && !stop )
00193     {
00194       unsigned int common;
00195 
00196       common = common_prefix_length(it_p, it_t, pattern_end, text_end);
00197       i += common;
00198       it_p += common;
00199       it_t += common;
00200 
00201       if (it_p == 1)
00202   ++it_t;
00203       else
00204         {
00205     if ( (int)it_p == pattern_length+1 )
00206       stop = !action( (int)it_t - pattern_length-1 );
00207 
00208     i = spi[i-1];
00209     it_p.set(pattern_begin+i, i+1);
00210         }
00211     }
00212 } // operator()

Generated on 9 Nov 2009 for CLAW Library (a C++ Library Absolutely Wonderful) by  doxygen 1.6.1