19 #include <google/protobuf/message.h> 35 CFile(fname, rw, name)
45 void CProtobufFile::init()
48 message_size=1024*1024;
50 buffer=SG_MALLOC(uint8_t, message_size*
sizeof(uint32_t));
53 #define GET_VECTOR(sg_type) \ 54 void CProtobufFile::get_vector(sg_type*& vector, int32_t& len) \ 56 read_and_validate_global_header(ShogunVersion::VECTOR); \ 57 VectorHeader data_header=read_vector_header(); \ 58 len=data_header.len(); \ 59 read_memory_block(vector, len, data_header.num_messages()); \ 76 #define GET_MATRIX(read_func, sg_type) \ 77 void CProtobufFile::get_matrix(sg_type*& matrix, int32_t& num_feat, int32_t& num_vec) \ 79 read_and_validate_global_header(ShogunVersion::MATRIX); \ 80 MatrixHeader data_header=read_matrix_header(); \ 81 num_feat=data_header.num_cols(); \ 82 num_vec=data_header.num_rows(); \ 83 read_memory_block(matrix, num_feat*num_vec, data_header.num_messages()); \ 100 #define GET_NDARRAY(read_func, sg_type) \ 101 void CProtobufFile::get_ndarray(sg_type*& array, int32_t*& dims, int32_t& num_dims) \ 115 #define GET_SPARSE_MATRIX(sg_type) \ 116 void CProtobufFile::get_sparse_matrix( \ 117 SGSparseVector<sg_type>*& matrix, int32_t& num_feat, int32_t& num_vec) \ 119 read_and_validate_global_header(ShogunVersion::SPARSE_MATRIX); \ 120 SparseMatrixHeader data_header=read_sparse_matrix_header(); \ 121 num_feat=data_header.num_features(); \ 122 num_vec=data_header.num_vectors(); \ 123 read_sparse_matrix(matrix, data_header); \ 139 #undef GET_SPARSE_MATRIX 141 #define SET_VECTOR(sg_type) \ 142 void CProtobufFile::set_vector(const sg_type* vector, int32_t len) \ 144 int32_t num_messages=compute_num_messages(len, sizeof(sg_type)); \ 145 write_global_header(ShogunVersion::VECTOR); \ 146 write_vector_header(len, num_messages); \ 147 write_memory_block(vector, len, num_messages); \ 164 #define SET_MATRIX(sg_type) \ 165 void CProtobufFile::set_matrix(const sg_type* matrix, int32_t num_feat, int32_t num_vec) \ 167 int32_t num_messages=compute_num_messages(num_feat*num_vec, sizeof(sg_type)); \ 168 write_global_header(ShogunVersion::MATRIX); \ 169 write_matrix_header(num_feat, num_vec, num_messages); \ 170 write_memory_block(matrix, num_feat*num_vec, num_messages); \ 187 #define SET_SPARSE_MATRIX(sg_type) \ 188 void CProtobufFile::set_sparse_matrix( \ 189 const SGSparseVector<sg_type>* matrix, int32_t num_feat, int32_t num_vec) \ 191 write_global_header(ShogunVersion::SPARSE_MATRIX); \ 192 write_sparse_matrix_header(matrix, num_feat, num_vec); \ 193 write_sparse_matrix(matrix, num_vec); \ 209 #undef SET_SPARSE_MATRIX 211 #define GET_STRING_LIST(sg_type) \ 212 void CProtobufFile::get_string_list( \ 213 SGString<sg_type>*& strings, int32_t& num_str, \ 214 int32_t& max_string_len) \ 216 read_and_validate_global_header(ShogunVersion::STRING_LIST); \ 217 StringListHeader data_header=read_string_list_header(); \ 218 num_str=data_header.num_str(); \ 219 max_string_len=data_header.max_string_len(); \ 220 read_string_list(strings, data_header); \ 235 #undef GET_STRING_LIST 237 #define SET_STRING_LIST(sg_type) \ 238 void CProtobufFile::set_string_list( \ 239 const SGString<sg_type>* strings, int32_t num_str) \ 241 write_global_header(ShogunVersion::STRING_LIST); \ 242 write_string_list_header(strings, num_str); \ 243 write_string_list(strings, num_str); \ 258 #undef SET_STRING_LIST 260 void CProtobufFile::write_big_endian_uint(uint32_t number, uint8_t* array, uint32_t size)
263 SG_ERROR(
"array is too small to write\n");
265 array[0]=(number>>24)&0xffu;
266 array[1]=(number>>16)&0xffu;
267 array[2]=(number>>8)&0xffu;
268 array[3]=number&0xffu;
271 uint32_t CProtobufFile::read_big_endian_uint(uint8_t* array, uint32_t size)
274 SG_ERROR(
"array is too small to read\n");
276 return (array[0]<<24) | (array[1]<<16) | (array[2]<<8) | array[3];
279 int32_t CProtobufFile::compute_num_messages(uint64_t len, int32_t sizeof_type)
const 281 uint32_t elements_in_message=message_size/sizeof_type;
282 uint32_t num_messages=len/elements_in_message;
283 if (len % elements_in_message > 0)
289 void CProtobufFile::read_and_validate_global_header(ShogunVersion_SGDataType type)
291 ShogunVersion header;
292 read_message(header);
293 REQUIRE(header.version()==version,
"wrong version\n")
294 REQUIRE(header.data_type()==type,
"wrong type\n")
297 void CProtobufFile::write_global_header(ShogunVersion_SGDataType type)
299 ShogunVersion header;
300 header.set_version(version);
301 header.set_data_type(type);
302 write_message(header);
305 VectorHeader CProtobufFile::read_vector_header()
307 VectorHeader data_header;
308 read_message(data_header);
313 SparseMatrixHeader CProtobufFile::read_sparse_matrix_header()
315 SparseMatrixHeader data_header;
316 read_message(data_header);
321 MatrixHeader CProtobufFile::read_matrix_header()
323 MatrixHeader data_header;
324 read_message(data_header);
329 StringListHeader CProtobufFile::read_string_list_header()
331 StringListHeader data_header;
332 read_message(data_header);
337 void CProtobufFile::write_vector_header(int32_t len, int32_t num_messages)
339 VectorHeader data_header;
340 data_header.set_len(len);
341 data_header.set_num_messages(num_messages);
342 write_message(data_header);
345 void CProtobufFile::write_matrix_header(int32_t num_feat, int32_t num_vec, int32_t num_messages)
347 MatrixHeader data_header;
348 data_header.set_num_cols(num_feat);
349 data_header.set_num_rows(num_vec);
350 data_header.set_num_messages(num_messages);
351 write_message(data_header);
354 #define WRITE_SPARSE_MATRIX_HEADER(sg_type) \ 355 void CProtobufFile::write_sparse_matrix_header( \ 356 const SGSparseVector<sg_type>* matrix, int32_t num_feat, int32_t num_vec) \ 358 SparseMatrixHeader data_header; \ 359 data_header.set_num_features(num_feat); \ 360 data_header.set_num_vectors(num_vec); \ 361 for (int32_t i=0; i<num_vec; i++) \ 363 data_header.add_num_feat_entries(matrix[i].num_feat_entries); \ 366 write_message(data_header); \ 382 #undef WRITE_SPARSE_MATRIX_HEADER 384 #define WRITE_STRING_LIST_HEADER(sg_type) \ 385 void CProtobufFile::write_string_list_header(const SGString<sg_type>* strings, int32_t num_str) \ 387 int32_t max_string_len=0; \ 388 StringListHeader data_header; \ 389 data_header.set_num_str(num_str); \ 390 for (int32_t i=0; i<num_str; i++) \ 392 data_header.add_str_len(strings[i].slen); \ 393 if (strings[i].slen>max_string_len) \ 394 max_string_len=strings[i].slen; \ 396 data_header.set_max_string_len(max_string_len); \ 397 write_message(data_header); \ 412 #undef WRITE_STRING_LIST_HEADER 414 void CProtobufFile::read_message(google::protobuf::Message& message)
416 uint32_t bytes_read=0;
420 bytes_read=fread(uint_buffer,
sizeof(
char),
sizeof(uint32_t),
file);
421 REQUIRE(bytes_read==
sizeof(uint32_t),
"IO error\n");
422 msg_size=read_big_endian_uint(uint_buffer,
sizeof(uint32_t));
423 REQUIRE(msg_size>0,
"message size should be more than zero\n");
426 bytes_read=fread(buffer,
sizeof(
char), msg_size,
file);
427 REQUIRE(bytes_read==msg_size,
"IO error\n");
430 REQUIRE(message.ParseFromArray(buffer, msg_size),
"cannot parse header\n");
433 void CProtobufFile::write_message(
const google::protobuf::Message& message)
435 uint32_t bytes_write=0;
436 uint32_t msg_size=message.ByteSize();
439 write_big_endian_uint(msg_size, uint_buffer,
sizeof(uint32_t));
440 bytes_write=fwrite(uint_buffer,
sizeof(
char),
sizeof(uint32_t),
file);
441 REQUIRE(bytes_write==
sizeof(uint32_t),
"IO error\n");
444 message.SerializeToArray(buffer, msg_size);
445 bytes_write=fwrite(buffer,
sizeof(
char), msg_size,
file);
446 REQUIRE(bytes_write==msg_size,
"IO error\n");
449 #define READ_MEMORY_BLOCK(chunk_type, sg_type) \ 450 void CProtobufFile::read_memory_block(sg_type*& vector, uint64_t len, int32_t num_messages) \ 452 vector=SG_MALLOC(sg_type, len); \ 455 int32_t elements_in_message=message_size/sizeof(sg_type); \ 456 for (int32_t i=0; i<num_messages; i++) \ 458 read_message(chunk); \ 460 int32_t num_elements_to_read=0; \ 461 if ((len-(i+1)*elements_in_message)<=0) \ 462 num_elements_to_read=len-i*elements_in_message; \ 464 num_elements_to_read=elements_in_message; \ 466 for (int32_t j=0; j<num_elements_to_read; j++) \ 467 vector[j+i*elements_in_message]=chunk.data(j); \ 483 #undef READ_MEMORY_BLOCK 485 #define WRITE_MEMORY_BLOCK(chunk_type, sg_type) \ 486 void CProtobufFile::write_memory_block(const sg_type* vector, uint64_t len, int32_t num_messages) \ 489 int32_t elements_in_message=message_size/sizeof(sg_type); \ 490 for (int32_t i=0; i<num_messages; i++) \ 493 int32_t num_elements_to_write=0; \ 494 if ((len-(i+1)*elements_in_message)<=0) \ 495 num_elements_to_write=len-i*elements_in_message; \ 497 num_elements_to_write=elements_in_message; \ 499 for (int32_t j=0; j<num_elements_to_write; j++) \ 500 chunk.add_data(vector[j+i*elements_in_message]); \ 502 write_message(chunk); \ 519 #undef WRITE_MEMORY_BLOCK 521 #define READ_SPARSE_MATRIX(chunk_type, sg_type) \ 522 void CProtobufFile::read_sparse_matrix( \ 523 SGSparseVector<sg_type>*& matrix, const SparseMatrixHeader& data_header) \ 525 matrix=SG_MALLOC(SGSparseVector<sg_type>, data_header.num_vectors()); \ 527 UInt64Chunk feat_index_chunk; \ 528 chunk_type entry_chunk; \ 529 read_message(feat_index_chunk); \ 530 read_message(entry_chunk); \ 532 int32_t elements_in_message=message_size/sizeof(sg_type); \ 533 int32_t buffer_counter=0; \ 534 for (uint32_t i=0; i<data_header.num_vectors(); i++) \ 536 matrix[i]=SGSparseVector<sg_type>(data_header.num_feat_entries(i)); \ 537 for (int32_t j=0; j<matrix[i].num_feat_entries; j++) \ 539 matrix[i].features[j].feat_index=feat_index_chunk.data(buffer_counter); \ 540 matrix[i].features[j].entry=entry_chunk.data(buffer_counter); \ 543 if (buffer_counter==elements_in_message) \ 545 read_message(feat_index_chunk); \ 546 read_message(entry_chunk); \ 566 #undef READ_SPARSE_MATRIX 568 #define WRITE_SPARSE_MATRIX(chunk_type, sg_type) \ 569 void CProtobufFile::write_sparse_matrix( \ 570 const SGSparseVector<sg_type>* matrix, int32_t num_vec) \ 572 UInt64Chunk feat_index_chunk; \ 573 chunk_type entry_chunk; \ 574 int32_t elements_in_message=message_size/sizeof(sg_type); \ 575 int32_t buffer_counter=0; \ 576 for (int32_t i=0; i<num_vec; i++) \ 578 for (int32_t j=0; j<matrix[i].num_feat_entries; j++) \ 580 feat_index_chunk.add_data(matrix[i].features[j].feat_index); \ 581 entry_chunk.add_data(matrix[i].features[j].entry); \ 584 if (buffer_counter==elements_in_message) \ 586 write_message(feat_index_chunk); \ 587 write_message(entry_chunk); \ 588 feat_index_chunk.Clear(); \ 589 entry_chunk.Clear(); \ 595 if (buffer_counter!=0) \ 597 write_message(feat_index_chunk); \ 598 write_message(entry_chunk); \ 615 #undef WRITE_SPARSE_MATRIX 617 #define READ_STRING_LIST(chunk_type, sg_type) \ 618 void CProtobufFile::read_string_list( \ 619 SGString<sg_type>*& strings, const StringListHeader& data_header) \ 621 strings=SG_MALLOC(SGString<sg_type>, data_header.num_str()); \ 624 read_message(chunk); \ 625 int32_t elements_in_message=message_size/sizeof(sg_type); \ 626 int32_t buffer_counter=0; \ 627 for (uint32_t i=0; i<data_header.num_str(); i++) \ 629 strings[i]=SGString<sg_type>(data_header.str_len(i)); \ 630 for (int32_t j=0; j<strings[i].slen; j++) \ 632 strings[i].string[j]=chunk.data(buffer_counter); \ 635 if (buffer_counter==elements_in_message) \ 637 read_message(chunk); \ 656 #undef READ_STRING_LIST 658 #define WRITE_STRING_LIST(chunk_type, sg_type) \ 659 void CProtobufFile::write_string_list( \ 660 const SGString<sg_type>* strings, int32_t num_str) \ 663 int32_t elements_in_message=message_size/sizeof(sg_type); \ 664 int32_t buffer_counter=0; \ 665 for (int32_t i=0; i<num_str; i++) \ 667 for (int32_t j=0; j<strings[i].slen; j++) \ 669 chunk.add_data(strings[i].string[j]); \ 672 if (buffer_counter==elements_in_message) \ 674 write_message(chunk); \ 681 if (buffer_counter!=0) \ 682 write_message(chunk); \ 697 #undef WRITE_STRING_LIST
#define SET_MATRIX(sg_type)
#define GET_MATRIX(read_func, sg_type)
#define GET_STRING_LIST(sg_type)
#define SET_STRING_LIST(sg_type)
#define GET_NDARRAY(read_func, sg_type)
#define GET_SPARSE_MATRIX(sg_type)
#define GET_VECTOR(sg_type)
#define READ_STRING_LIST(chunk_type, sg_type)
#define WRITE_STRING_LIST(chunk_type, sg_type)
#define SET_VECTOR(sg_type)
A File access base class.
#define WRITE_SPARSE_MATRIX(chunk_type, sg_type)
all of classes and functions are contained in the shogun namespace
#define WRITE_SPARSE_MATRIX_HEADER(sg_type)
#define WRITE_STRING_LIST_HEADER(sg_type)
#define WRITE_MEMORY_BLOCK(chunk_type, sg_type)
#define READ_SPARSE_MATRIX(chunk_type, sg_type)
#define READ_MEMORY_BLOCK(chunk_type, sg_type)
#define SET_SPARSE_MATRIX(sg_type)