19 #ifndef ALEXANDRIA_NDARRAY_IMPL_NPYCOMMON_H
20 #define ALEXANDRIA_NDARRAY_IMPL_NPYCOMMON_H
23 #include <boost/endian/arithmetic.hpp>
24 #include <boost/filesystem/operations.hpp>
25 #include <boost/iostreams/device/mapped_file.hpp>
26 #include <boost/regex.hpp>
31 using boost::endian::little_uint16_t;
32 using boost::endian::little_uint32_t;
37 constexpr
const char NPY_MAGIC[] = {
'\x93',
'N',
'U',
'M',
'P',
'Y'};
42 #if BYTE_ORDER == LITTLE_ENDIAN
44 #elif BYTE_ORDER == BIG_ENDIAN
47 #error "PDP_ENDIAN not supported"
58 static constexpr
const char* str =
"b";
63 static constexpr
const char* str =
"i2";
68 static constexpr
const char* str =
"i4";
73 static constexpr
const char* str =
"i8";
78 static constexpr
const char* str =
"B";
83 static constexpr
const char* str =
"u2";
88 static constexpr
const char* str =
"u4";
93 static constexpr
const char* str =
"u8";
98 static constexpr
const char* str =
"f4";
103 static constexpr
const char* str =
"f8";
110 big_endian = (descr.
front() ==
'>');
123 static const boost::regex field_expr(
"\\('([^']*)',\\s*'([^']*)'\\)");
125 boost::match_results<std::string::const_iterator> match;
126 auto start = descr.
begin();
127 auto end = descr.
end();
129 while (boost::regex_search(start, end, match, field_expr)) {
136 big_endian = endian_aux;
137 }
else if (dtype != dtype_aux || big_endian != endian_aux) {
142 start = match[0].second;
165 auto loc = header.
find(
"fortran_order") + 16;
166 fortran_order = (header.
substr(loc, 4) ==
"True");
168 loc = header.
find(
"descr") + 8;
170 if (header[loc] ==
'\'') {
171 auto end = header.
find(
'\'', loc + 1);
173 }
else if (header[loc] ==
'[') {
174 auto end = header.
find(
']', loc + 1);
180 loc = header.
find(
"shape") + 9;
181 auto loc2 = header.
find(
')', loc);
182 auto shape_str = header.
substr(loc, loc2 - loc);
183 if (shape_str.back() ==
',')
184 shape_str.resize(shape_str.size() - 1);
185 shape = stringToVector<size_t>(shape_str);
204 size_t& n_elements) {
207 input.
read(magic,
sizeof(magic));
213 little_uint32_t header_len;
214 little_uint16_t version;
215 input.
read(
reinterpret_cast<char*
>(&version),
sizeof(version));
218 }
else if (version.data()[0] == 1) {
221 input.
read(
reinterpret_cast<char*
>(&aux),
sizeof(aux));
225 input.
read(
reinterpret_cast<char*
>(&header_len),
sizeof(header_len));
230 input.
read(&header[0], header_len);
233 bool fortran_order, big_endian;
234 parseNpyDict(header, fortran_order, big_endian, dtype, shape, attrs, n_elements);
239 if ((big_endian && (BYTE_ORDER != BIG_ENDIAN)) || (!big_endian && (BYTE_ORDER != LITTLE_ENDIAN)))
256 for (
auto s : shape) {
257 shape_stream <<
s <<
',';
260 return shape_stream.
str();
269 for (
auto& attr : attrs) {
270 dtype <<
"('" << attr <<
"', '" <<
ENDIAN_MARKER << type <<
"'), ";
280 template <
typename T>
282 if (!attrs.
empty()) {
293 auto header_str = header.
str();
294 little_uint32_t header_len = header_str.size();
297 size_t total_length =
sizeof(
NPY_MAGIC) +
sizeof(
NPY_VERSION) +
sizeof(header_len) + header_len + 1;
298 size_t padding = 64 - total_length % 64;
303 header_str = header.
str();
304 header_len = header_str.size();
311 out.
write(
reinterpret_cast<char*
>(&header_len),
sizeof(header_len));
314 out.
write(header_str.data(), header_str.size());
323 template <
typename T>
348 auto header_str = header.
str();
349 auto header_size = header_str.size();
353 "The new header length must match the allocated space.";
357 size_t new_size = header_size +
sizeof(T) *
m_n_elements;
361 boost::filesystem::resize_file(
m_path, new_size);
MappedContainer(const boost::filesystem::path &path, size_t data_offset, size_t n_elements, const std::vector< std::string > &attr_names, boost::iostreams::mapped_file &&input, size_t max_size)
boost::filesystem::path m_path
std::vector< std::string > m_attr_names
void resize(const std::vector< size_t > &shape)
boost::iostreams::mapped_file m_mapped
T emplace_back(T... args)
Elements::Path::Item path
std::string typeDescription(const std::string &type, const std::vector< std::string > &attrs)
void writeNpyHeader(std::ostream &out, std::vector< size_t > shape, const std::vector< std::string > &attrs)
void parseSingleValue(const std::string &descr, bool &big_endian, std::string &dtype)
std::string npyShape(std::vector< size_t > shape)
void readNpyHeader(std::istream &input, std::string &dtype, std::vector< size_t > &shape, std::vector< std::string > &attrs, size_t &n_elements)
void parseFieldValues(const std::string &descr, bool &big_endian, std::vector< std::string > &attrs, std::string &dtype)
constexpr const uint8_t NPY_VERSION[]
constexpr const char * ENDIAN_MARKER
void parseNpyDict(const std::string &header, bool &fortran_order, bool &big_endian, std::string &dtype, std::vector< size_t > &shape, std::vector< std::string > &attrs, size_t &n_elements)
constexpr const char NPY_MAGIC[]