11 #ifndef _DELIMITERTOKENIZER__H__ 12 #define _DELIMITERTOKENIZER__H__ 14 #include <shogun/lib/config.h> 75 virtual const char*
get_name()
const;
virtual index_t next_token_idx(index_t &start)
bool get_skip_delimiters() const
void set_skip_delimiters(bool skip_delimiters)
CDelimiterTokenizer * get_copy()
The class CTokenizer acts as a base class in order to implement tokenizers. Sub-classes must implemen...
CDelimiterTokenizer(bool skip_delimiters=false)
all of classes and functions are contained in the shogun namespace
The class CDelimiterTokenizer is used to tokenize a SGVector<char> into tokens using custom chars as ...
SGVector< bool > delimiters
void init_for_whitespace()
virtual void set_text(SGVector< char > txt)
bool skip_consecutive_delimiters
virtual const char * get_name() const
virtual ~CDelimiterTokenizer()