21 init(file, is_labelled, size);
29 if (parser.is_running())
41 num_symbols=alphabet->get_num_symbols();
51 num_symbols=alphabet->get_num_symbols();
58 alpha_ascii=
new CAlphabet(ascii_alphabet);
66 alpha_ascii=
new CAlphabet(ascii_alphabet);
94 return current_length;
104 parser.set_read_vector_and_label
108 #define GET_FEATURE_TYPE(f_type, sg_type) \ 109 template<> EFeatureType CStreamingStringFeatures<sg_type>::get_feature_type() const \ 127 #undef GET_FEATURE_TYPE 131 void CStreamingStringFeatures<T>::init()
134 alphabet=
new CAlphabet();
138 current_sgstring.string=current_string;
139 current_sgstring.slen=current_length;
145 void CStreamingStringFeatures<T>::init(CStreamingFile* file,
150 has_labels=is_labelled;
152 parser.init(file, is_labelled, size);
153 parser.set_free_vector_after_release(
false);
154 parser.set_free_vectors_on_destruct(
false);
161 alpha_ascii=alphabet;
163 if (!parser.is_running())
164 parser.start_parser();
178 ret_value = (bool) parser.get_next_example(current_string,
188 alpha_ascii->add_string_to_histogram(current_string, current_length);
190 for (i=0; i<current_length; i++)
191 current_string[i]=alpha_ascii->remap_to_bin(current_string[i]);
192 alpha_bin->add_string_to_histogram(current_string, current_length);
196 alpha_ascii->add_string_to_histogram(current_string, current_length);
200 if ( !(alpha_ascii->check_alphabet_size() && alpha_ascii->check_alphabet()) )
202 SG_ERROR(
"StreamingStringFeatures: The given input was found to be incompatible with the alphabet!\n")
211 alphabet=alpha_ascii;
214 num_symbols=alphabet->get_num_symbols();
222 current_sgstring.string=current_string;
223 current_sgstring.slen=current_length;
225 return current_sgstring;
233 return current_label;
239 parser.finalize_example();
245 return current_length;
void use_alphabet(EAlphabet alpha)
virtual void get_string(bool *&vector, int32_t &len)
CStreamingStringFeatures()
virtual float64_t get_label()
virtual int32_t get_num_vectors() const
EAlphabet
Alphabet of charfeatures/observations.
SGString< T > get_vector()
virtual int32_t get_num_features()
The class Alphabet implements an alphabet and alphabet utility functions.
virtual EFeatureClass get_feature_class() const
EFeatureClass
shogun feature class
A Streaming File access class.
virtual int32_t get_vector_length()
virtual void get_string_and_label(bool *&vector, int32_t &len, float64_t &label)
virtual void end_parser()
bool remap_to_bin
Whether remapping must be done.
floatmax_t get_num_symbols()
This class implements streaming features as strings.
virtual bool get_next_example()
virtual void start_parser()
all of classes and functions are contained in the shogun namespace
virtual void set_vector_reader()
void set_read_functions()
#define GET_FEATURE_TYPE(f_type, sg_type)
virtual ~CStreamingStringFeatures()
Streaming features are features which are used for online algorithms.
virtual void set_vector_and_label_reader()
virtual void release_example()
void set_remap(CAlphabet *ascii_alphabet, CAlphabet *binary_alphabet)
CAlphabet * get_alphabet()