11 #ifndef _NGRAMTOKENIZER__H__ 12 #define _NGRAMTOKENIZER__H__ 14 #include <shogun/lib/config.h> 20 template <
class T>
class SGVector;
70 virtual const char*
get_name()
const;
The class CNGramTokenizer is used to tokenize a SGVector<char> into n-grams.
The class CTokenizer acts as a base class in order to implement tokenizers. Sub-classes must implemen...
virtual void set_text(SGVector< char > txt)
all of classes and functions are contained in the shogun namespace
virtual index_t next_token_idx(index_t &start)
CNGramTokenizer(int32_t ns=3)
virtual ~CNGramTokenizer()
virtual const char * get_name() const
virtual CNGramTokenizer * get_copy()