23 SG_UNSTABLE(
"CStreamingAsciiFile::CStreamingAsciiFile()",
"\n")
39 #define GET_VECTOR(fname, conv, sg_type) \ 40 void CStreamingAsciiFile::get_vector(sg_type*& vector, int32_t& num_feat) \ 42 char* buffer = NULL; \ 44 int32_t old_len = num_feat; \ 47 bytes_read = buf->read_line(buffer); \ 61 char* ptr_item=NULL; \ 62 char* ptr_data=buffer; \ 63 DynArray<char*>* items=new DynArray<char*>(); \ 67 if ((*ptr_data=='\n') || \ 68 (ptr_data - buffer >= bytes_read)) \ 73 append_item(items, ptr_data, ptr_item); \ 80 else if (!isblank(*ptr_data) && !ptr_item) \ 84 else if (isblank(*ptr_data) && ptr_item) \ 86 append_item(items, ptr_data, ptr_item); \ 94 SG_DEBUG("num_feat %d\n", num_feat) \ 97 if (old_len < num_feat) \ 98 vector=SG_REALLOC(sg_type, vector, old_len, num_feat); \ 100 for (int32_t i=0; i<num_feat; i++) \ 102 char* item=items->get_element(i); \ 103 vector[i]=conv(item); \ 123 #define GET_FLOAT_VECTOR(sg_type) \ 124 void CStreamingAsciiFile::get_vector(sg_type*& vector, int32_t& len)\ 128 int32_t num_chars = buf->read_line(line); \ 129 int32_t old_len = len; \ 131 if (num_chars == 0) \ 138 substring example_string = {line, line + num_chars}; \ 140 tokenize(m_delimiter, example_string, words); \ 142 len = words.index(); \ 143 substring* feature_start = &words[0]; \ 146 vector = SG_REALLOC(sg_type, vector, old_len, len); \ 149 for (substring* i = feature_start; i != words.end; i++) \ 151 vector[j++] = SGIO::float_of_substring(*i); \ 158 #undef GET_FLOAT_VECTOR 162 #define GET_VECTOR_AND_LABEL(fname, conv, sg_type) \ 163 void CStreamingAsciiFile::get_vector_and_label(sg_type*& vector, int32_t& num_feat, float64_t& label) \ 165 char* buffer = NULL; \ 166 ssize_t bytes_read; \ 167 int32_t old_len = num_feat; \ 170 bytes_read = buf->read_line(buffer); \ 184 char* ptr_item=NULL; \ 185 char* ptr_data=buffer; \ 186 DynArray<char*>* items=new DynArray<char*>(); \ 190 if ((*ptr_data=='\n') || \ 191 (ptr_data - buffer >= bytes_read)) \ 196 append_item(items, ptr_data, ptr_item); \ 203 else if (!isblank(*ptr_data) && !ptr_item) \ 207 else if (isblank(*ptr_data) && ptr_item) \ 209 append_item(items, ptr_data, ptr_item); \ 217 SG_DEBUG("num_feat %d\n", num_feat) \ 219 label=atof(items->get_element(0)); \ 221 if (old_len < num_feat - 1) \ 222 vector=SG_REALLOC(sg_type, vector, old_len, num_feat-1); \ 224 for (int32_t i=1; i<num_feat; i++) \ 226 char* item=items->get_element(i); \ 227 vector[i-1]=conv(item); \ 246 #undef GET_VECTOR_AND_LABEL 248 #define GET_FLOAT_VECTOR_AND_LABEL(sg_type) \ 249 void CStreamingAsciiFile::get_vector_and_label(sg_type*& vector, int32_t& len, float64_t& label) \ 253 int32_t num_chars = buf->read_line(line); \ 254 int32_t old_len = len; \ 256 if (num_chars == 0) \ 263 substring example_string = {line, line + num_chars}; \ 265 tokenize(m_delimiter, example_string, words); \ 267 label = SGIO::float_of_substring(words[0]); \ 269 len = words.index() - 1; \ 270 substring* feature_start = &words[1]; \ 273 vector = SG_REALLOC(sg_type, vector, old_len, len); \ 276 for (substring* i = feature_start; i != words.end; i++) \ 278 vector[j++] = SGIO::float_of_substring(*i); \ 285 #undef GET_FLOAT_VECTOR_AND_LABEL 289 #define GET_STRING(fname, conv, sg_type) \ 290 void CStreamingAsciiFile::get_string(sg_type*& vector, int32_t& len) \ 292 char* buffer = NULL; \ 293 ssize_t bytes_read; \ 296 bytes_read = buf->read_line(buffer); \ 306 SG_DEBUG("Line read from the file:\n%s\n", buffer) \ 308 if (buffer[bytes_read-1]=='\n') \ 311 buffer[bytes_read-1]='\0'; \ 315 vector=(sg_type *) buffer; \ 336 #define GET_STRING_AND_LABEL(fname, conv, sg_type) \ 337 void CStreamingAsciiFile::get_string_and_label(sg_type*& vector, int32_t& len, float64_t& label) \ 339 char* buffer = NULL; \ 340 ssize_t bytes_read; \ 343 bytes_read = buf->read_line(buffer); \ 353 int32_t str_start_pos=-1; \ 355 for (int32_t i=0; i<bytes_read; i++) \ 357 if (buffer[i] == ' ') \ 360 label=atoi(buffer); \ 367 if (str_start_pos == -1) \ 374 if (buffer[bytes_read-1]=='\n') \ 376 buffer[bytes_read-1]='\0'; \ 377 len=bytes_read-str_start_pos-1; \ 380 len=bytes_read-str_start_pos; \ 382 vector=(sg_type*) &buffer[str_start_pos]; \ 399 #undef GET_STRING_AND_LABEL 403 #define GET_SPARSE_VECTOR(fname, conv, sg_type) \ 404 void CStreamingAsciiFile::get_sparse_vector(SGSparseVectorEntry<sg_type>*& vector, int32_t& len) \ 406 char* buffer = NULL; \ 407 ssize_t bytes_read; \ 410 bytes_read = buf->read_line(buffer); \ 422 if (buffer[bytes_read-1]=='\n') \ 424 num_chars=bytes_read-1; \ 425 buffer[num_chars]='\0'; \ 428 num_chars=bytes_read; \ 430 int32_t num_dims=0; \ 431 for (int32_t i=0; i<num_chars; i++) \ 433 if (buffer[i]==':') \ 439 int32_t index_start_pos=-1; \ 440 int32_t feature_start_pos; \ 441 int32_t current_feat=0; \ 442 if (len < num_dims) \ 443 vector=SG_REALLOC(SGSparseVectorEntry<sg_type>, vector, len, num_dims); \ 444 for (int32_t i=0; i<num_chars; i++) \ 446 if (buffer[i]==':') \ 449 vector[current_feat].feat_index=(int32_t) atoi(buffer+index_start_pos)-1; \ 451 index_start_pos=-1; \ 453 feature_start_pos=i+1; \ 454 while ((buffer[i]!=' ') && (i<num_chars)) \ 460 vector[current_feat].entry=(sg_type) conv(buffer+feature_start_pos); \ 464 else if (buffer[i]==' ') \ 471 if (index_start_pos == -1) \ 493 #undef GET_SPARSE_VECTOR 497 #define GET_SPARSE_VECTOR_AND_LABEL(fname, conv, sg_type) \ 498 void CStreamingAsciiFile::get_sparse_vector_and_label(SGSparseVectorEntry<sg_type>*& vector, int32_t& len, float64_t& label) \ 500 char* buffer = NULL; \ 501 ssize_t bytes_read; \ 504 bytes_read = buf->read_line(buffer); \ 516 if (buffer[bytes_read-1]=='\n') \ 518 num_chars=bytes_read-1; \ 519 buffer[num_chars]='\0'; \ 522 num_chars=bytes_read; \ 524 int32_t num_dims=0; \ 525 for (int32_t i=0; i<num_chars; i++) \ 527 if (buffer[i]==':') \ 533 int32_t index_start_pos=-1; \ 534 int32_t feature_start_pos; \ 535 int32_t current_feat=0; \ 536 int32_t label_pos=-1; \ 537 if (len < num_dims) \ 538 vector=SG_REALLOC(SGSparseVectorEntry<sg_type>, vector, len, num_dims); \ 540 for (int32_t i=1; i<num_chars; i++) \ 542 if (buffer[i]==':') \ 546 if ( (buffer[i]==' ') && (buffer[i-1]!=' ') ) \ 550 label=atof(buffer); \ 556 SG_ERROR("No label found!\n") \ 558 buffer+=label_pos+1; \ 559 num_chars-=label_pos+1; \ 560 for (int32_t i=0; i<num_chars; i++) \ 562 if (buffer[i]==':') \ 565 vector[current_feat].feat_index=(int32_t) atoi(buffer+index_start_pos)-1; \ 567 index_start_pos=-1; \ 569 feature_start_pos=i+1; \ 570 while ((buffer[i]!=' ') && (i<num_chars)) \ 576 vector[current_feat].entry=(sg_type) conv(buffer+feature_start_pos); \ 580 else if (buffer[i]==' ') \ 587 if (index_start_pos == -1) \ 609 #undef GET_SPARSE_VECTOR_AND_LABEL 612 void CStreamingAsciiFile::append_item(
613 DynArray<T>* items,
char* ptr_data,
char* ptr_item)
615 REQUIRE(ptr_data && ptr_item,
"Data and Item to append should not be NULL\n");
617 size_t len=(ptr_data-ptr_item)/
sizeof(
char);
618 char* item=SG_MALLOC(
char, len+1);
619 memset(item, 0,
sizeof(
char)*(len+1));
620 item=strncpy(item, ptr_item, len);
622 SG_DEBUG(
"current %c, len %d, item %s\n", *ptr_data, len, item)
628 m_delimiter = delimiter;
633 char *last = s.
start;
636 if (*s.
start == delim)
#define GET_SPARSE_VECTOR(fname, conv, sg_type)
#define GET_STRING(fname, conv, sg_type)
void set_delimiter(char delimiter)
bool append_element(T element)
#define GET_VECTOR(fname, conv, sg_type)
virtual ~CStreamingAsciiFile()
Class v_array taken directly from JL's implementation.
struct Substring, specified by start position and end position.
void push(const T &new_elem)
A Streaming File access class.
#define GET_VECTOR_AND_LABEL(fname, conv, sg_type)
#define GET_FLOAT_VECTOR(sg_type)
Template Dynamic array class that creates an array that can be used like a list or an array...
#define GET_FLOAT_VECTOR_AND_LABEL(sg_type)
all of classes and functions are contained in the shogun namespace
#define GET_STRING_AND_LABEL(fname, conv, sg_type)
#define SG_UNSTABLE(func,...)
#define GET_SPARSE_VECTOR_AND_LABEL(fname, conv, sg_type)
bool str_to_bool(char *str)