00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016 #ifndef _MIMETIC_TOKENIZER_H_
00017 #define _MIMETIC_TOKENIZER_H_
00018 #include <iterator>
00019 #include <algorithm>
00020 #include <set>
00021 #include <string>
00022 #include <cstring>
00023
00024 namespace mimetic
00025 {
00026
00027 template<typename value_type>
00028 struct IsDelim: public std::unary_function<value_type,bool>
00029 {
00030 bool operator()(const value_type& val) const
00031 {
00032 return m_delims.count(val) != 0;
00033 }
00034 template<typename Container>
00035 void setDelimList(const Container& cont)
00036 {
00037 typename Container::const_iterator bit, eit;
00038 bit = cont.begin(), eit = cont.end();
00039 for(; bit != eit; ++bit)
00040 m_delims.insert(*bit);
00041 }
00042 template<typename Iterator>
00043 void setDelimList(Iterator bit, Iterator eit)
00044 {
00045 for(; bit != eit; ++bit)
00046 m_delims.insert(*bit);
00047 }
00048 void addDelim(const value_type& value)
00049 {
00050 m_delims.insert(value);
00051 }
00052 void removeDelim(const value_type& value)
00053 {
00054 m_delims.erase(value);
00055 }
00056 private:
00057 std::set<value_type> m_delims;
00058 };
00059
00060 template<>
00061 struct IsDelim<char>: public std::unary_function<char, bool>
00062 {
00063 void setDelimList(const std::string& delims)
00064 {
00065 setDelimList(delims.begin(), delims.end());
00066 }
00067 template<typename Iterator>
00068 void setDelimList(Iterator bit, Iterator eit)
00069 {
00070 memset(&m_lookup, 0, sizeof(m_lookup));
00071 for(; bit != eit; ++bit)
00072 m_lookup[(int)*bit] = 1;
00073 }
00074 bool operator()(unsigned char val) const
00075 {
00076 return m_lookup[val] != 0;
00077 }
00078 private:
00079 char m_lookup[256];
00080 };
00081
00082
00083
00084 template<class Iterator,typename value_type>
00085 class ItTokenizer
00086 {
00087 public:
00088 ItTokenizer(Iterator bit, Iterator eit)
00089 : m_bit(bit), m_eit(eit), m_tok_eit(bit)
00090 {
00091 }
00092 void setSource(Iterator bit, Iterator eit)
00093 {
00094 m_bit = bit;
00095 m_eit = eit;
00096 m_tok_eit = bit;
00097 }
00098 template<typename DelimCont>
00099 void setDelimList(const DelimCont& cont)
00100 {
00101 m_delimPred.setDelimList(cont);
00102 }
00103 template<typename It>
00104 void setDelimList(It bit, It eit)
00105 {
00106 m_delimPred.setDelimList(bit, eit);
00107 }
00108 template<typename DestCont>
00109 bool next(DestCont& dst)
00110 {
00111 dst.erase(dst.begin(), dst.end());
00112 if(m_tok_eit == m_eit)
00113 return false;
00114 m_tok_eit = std::find_if(m_bit, m_eit, m_delimPred);
00115 m_matched = 0;
00116 if(m_tok_eit != m_eit)
00117 m_matched = *m_tok_eit;
00118 std::copy(m_bit, m_tok_eit, std::back_inserter<DestCont>(dst));
00119 m_bit = (m_tok_eit != m_eit && ++m_tok_eit != m_eit ? m_tok_eit :m_eit);
00120 return true;
00121 }
00122 const value_type& matched() const
00123 {
00124 return m_matched;
00125 }
00126 void addDelim(const value_type& value)
00127 {
00128 m_delimPred.addDelim(value);
00129 }
00130 void removeDelim(const value_type& value)
00131 {
00132 m_delimPred.removeDelim(value);
00133 }
00134 private:
00135 Iterator m_bit, m_eit, m_tok_eit;
00136 IsDelim<value_type> m_delimPred;
00137 value_type m_matched;
00138 };
00139
00140
00141
00142 template<typename Container>
00143 struct ContTokenizer: public ItTokenizer<typename Container::const_iterator,typename Container::value_type>
00144 {
00145 typedef typename Container::value_type value_type;
00146 typedef typename Container::iterator iterator;
00147 typedef typename Container::const_iterator const_iterator;
00148
00149
00150
00151 ContTokenizer(const Container* cont)
00152 : ItTokenizer<const_iterator, value_type>(cont.begin(), cont.end())
00153 {
00154 }
00155 template<typename DelimCont>
00156 ContTokenizer(const Container* cont, const DelimCont& delims)
00157 : ItTokenizer<const_iterator,value_type>(cont->begin(), cont->end())
00158 {
00159 setDelimList(delims);
00160 }
00161 void setSource(const Container* cont)
00162 {
00163 ItTokenizer<const_iterator,value_type>::setSource(cont->begin(), cont->end());
00164 }
00165 private:
00166 ContTokenizer(const ContTokenizer&);
00167 ContTokenizer& operator=(const ContTokenizer&);
00168 };
00169
00170
00171 typedef ContTokenizer<std::string> StringTokenizer;
00172
00173 }
00174
00175 #endif
00176