Alexandria 2.31.0
SDC-CH common library for the Euclid project
Loading...
Searching...
No Matches
NpyCommon.h
Go to the documentation of this file.
1/*
2 * Copyright (C) 2012-2022 Euclid Science Ground Segment
3 *
4 * This library is free software; you can redistribute it and/or modify it under
5 * the terms of the GNU Lesser General Public License as published by the Free
6 * Software Foundation; either version 3.0 of the License, or (at your option)
7 * any later version.
8 *
9 * This library is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
11 * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
12 * details.
13 *
14 * You should have received a copy of the GNU Lesser General Public License
15 * along with this library; if not, write to the Free Software Foundation, Inc.,
16 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18
19#ifndef ALEXANDRIA_NDARRAY_IMPL_NPYCOMMON_H
20#define ALEXANDRIA_NDARRAY_IMPL_NPYCOMMON_H
21
23#include <boost/endian/arithmetic.hpp>
24#include <boost/filesystem/operations.hpp>
25#include <boost/iostreams/device/mapped_file.hpp>
26#include <numeric>
27
28namespace Euclid {
29namespace NdArray {
30
31using boost::endian::little_uint16_t;
32using boost::endian::little_uint32_t;
33
37constexpr const char NPY_MAGIC[] = {'\x93', 'N', 'U', 'M', 'P', 'Y'};
38
42#if BYTE_ORDER == LITTLE_ENDIAN
43constexpr const char* ENDIAN_MARKER = "<";
44#elif BYTE_ORDER == BIG_ENDIAN
45constexpr const char* ENDIAN_MARKER = ">";
46#else
47#error "PDP_ENDIAN not supported"
48#endif
49
53template <typename T>
54struct NpyDtype {};
55
56template <>
57struct NpyDtype<int8_t> {
58 static constexpr const char* str = "b";
59};
60
61template <>
62struct NpyDtype<int16_t> {
63 static constexpr const char* str = "i2";
64};
65
66template <>
67struct NpyDtype<int32_t> {
68 static constexpr const char* str = "i4";
69};
70
71template <>
72struct NpyDtype<int64_t> {
73 static constexpr const char* str = "i8";
74};
75
76template <>
77struct NpyDtype<uint8_t> {
78 static constexpr const char* str = "B";
79};
80
81template <>
82struct NpyDtype<uint16_t> {
83 static constexpr const char* str = "u2";
84};
85
86template <>
87struct NpyDtype<uint32_t> {
88 static constexpr const char* str = "u4";
89};
90
91template <>
92struct NpyDtype<uint64_t> {
93 static constexpr const char* str = "u8";
94};
95
96template <>
97struct NpyDtype<float> {
98 static constexpr const char* str = "f4";
99};
100
101template <>
102struct NpyDtype<double> {
103 static constexpr const char* str = "f8";
104};
105
109void parseSingleValue(const std::string& descr, bool& big_endian, std::string& dtype);
110
119void parseFieldValues(const std::string& descr, bool& big_endian, std::vector<std::string>& attrs, std::string& dtype);
120
138void parseNpyDict(const std::string& header, bool& fortran_order, bool& big_endian, std::string& dtype,
139 std::vector<size_t>& shape, std::vector<std::string>& attrs, size_t& n_elements);
140
156 size_t& n_elements);
157
161constexpr const uint8_t NPY_VERSION[] = {'\x02', '\x00'};
162
169 std::stringstream shape_stream;
170 shape_stream << "(";
171 for (auto s : shape) {
172 shape_stream << s << ',';
173 }
174 shape_stream << ")";
175 return shape_stream.str();
176}
177
179 std::stringstream dtype;
180 if (attrs.empty()) {
181 dtype << '\'' << ENDIAN_MARKER << type << '\'';
182 } else {
183 dtype << '[';
184 for (auto& attr : attrs) {
185 dtype << "('" << attr << "', '" << ENDIAN_MARKER << type << "'), ";
186 }
187 dtype << ']';
188 }
189 return dtype.str();
190}
191
195template <typename T>
197 if (!attrs.empty()) {
198 if (attrs.size() != shape.back()) {
199 throw std::out_of_range("Last axis does not match number of attribute names");
200 }
201 shape.pop_back();
202 }
203 // Serialize header as a Python dict
204 std::stringstream header;
205 header << "{"
206 << "'descr': " << typeDescription(NpyDtype<T>::str, attrs)
207 << ", 'fortran_order': False, 'shape': " << npyShape(shape) << "}";
208 auto header_str = header.str();
209 little_uint32_t header_len = header_str.size();
210
211 // Pad header with spaces so the header block is 64 bytes aligned
212 size_t total_length = sizeof(NPY_MAGIC) + sizeof(NPY_VERSION) + sizeof(header_len) + header_len + 1; // Keep 1 for \n
213 if (total_length % 64 > 0) {
214 size_t padding = 64 - total_length % 64;
215 header << std::string(padding, '\x20');
216 }
217 header << '\n';
218 header_str = header.str();
219 header_len = header_str.size();
220
221 // Magic and version
222 out.write(NPY_MAGIC, sizeof(NPY_MAGIC));
223 out.write(reinterpret_cast<const char*>(&NPY_VERSION), sizeof(NPY_VERSION));
224
225 // HEADER_LEN
226 out.write(reinterpret_cast<char*>(&header_len), sizeof(header_len));
227
228 // HEADER
229 out.write(header_str.data(), header_str.size());
230}
231
238template <typename T>
240public:
241 MappedContainer(const boost::filesystem::path& path, size_t data_offset, size_t n_elements,
242 const std::vector<std::string>& attr_names, boost::iostreams::mapped_file&& input, size_t max_size)
243 : m_path(path)
244 , m_data_offset(data_offset)
245 , m_n_elements(n_elements)
246 , m_max_size(max_size)
247 , m_attr_names(attr_names)
248 , m_mapped(std::move(input))
249 , m_data(reinterpret_cast<T*>(const_cast<char*>(m_mapped.const_data()) + data_offset)) {}
250
251 size_t size() const {
252 return m_n_elements;
253 }
254
255 size_t nbytes() const {
256 return m_max_size;
257 }
258
259 T* data() {
260 return m_data;
261 }
262
263 void resize(const std::vector<size_t>& shape) {
264 // Generate header
265 std::stringstream header;
266 writeNpyHeader<T>(header, shape, m_attr_names);
267 auto header_str = header.str();
268 auto header_size = header_str.size();
269 // Make sure we are in place
270 if (header_size != m_data_offset) {
271 throw Elements::Exception() << "Can not resize memory mapped NPY file. "
272 "The new header length must match the allocated space.";
273 }
274
276 size_t new_size = header_size + sizeof(T) * m_n_elements;
277 if (new_size > m_max_size) {
278 throw Elements::Exception() << "resize request bigger than maximum allocated size: " << new_size << " > "
279 << m_max_size;
280 }
281 boost::filesystem::resize_file(m_path, new_size);
282 std::copy(header_str.begin(), header_str.end(), m_mapped.data());
283 }
284
285private:
286 boost::filesystem::path m_path;
289 boost::iostreams::mapped_file m_mapped;
291};
292
293} // end of namespace NdArray
294} // end of namespace Euclid
295
296#endif // ALEXANDRIA_NDARRAY_IMPL_NPYCOMMON_H
T accumulate(T... args)
T back(T... args)
T begin(T... args)
MappedContainer(const boost::filesystem::path &path, size_t data_offset, size_t n_elements, const std::vector< std::string > &attr_names, boost::iostreams::mapped_file &&input, size_t max_size)
Definition NpyCommon.h:241
boost::filesystem::path m_path
Definition NpyCommon.h:286
std::vector< std::string > m_attr_names
Definition NpyCommon.h:288
void resize(const std::vector< size_t > &shape)
Definition NpyCommon.h:263
boost::iostreams::mapped_file m_mapped
Definition NpyCommon.h:289
T copy(T... args)
T empty(T... args)
T end(T... args)
std::string typeDescription(const std::string &type, const std::vector< std::string > &attrs)
Definition NpyCommon.h:178
void writeNpyHeader(std::ostream &out, std::vector< size_t > shape, const std::vector< std::string > &attrs)
Definition NpyCommon.h:196
void parseSingleValue(const std::string &descr, bool &big_endian, std::string &dtype)
Definition NpyCommon.cpp:25
std::string npyShape(std::vector< size_t > shape)
Definition NpyCommon.h:168
void readNpyHeader(std::istream &input, std::string &dtype, std::vector< size_t > &shape, std::vector< std::string > &attrs, size_t &n_elements)
Definition NpyCommon.cpp:81
void parseFieldValues(const std::string &descr, bool &big_endian, std::vector< std::string > &attrs, std::string &dtype)
Definition NpyCommon.cpp:30
constexpr const uint8_t NPY_VERSION[]
Definition NpyCommon.h:161
constexpr const char * ENDIAN_MARKER
Definition NpyCommon.h:43
void parseNpyDict(const std::string &header, bool &fortran_order, bool &big_endian, std::string &dtype, std::vector< size_t > &shape, std::vector< std::string > &attrs, size_t &n_elements)
Definition NpyCommon.cpp:55
constexpr const char NPY_MAGIC[]
Definition NpyCommon.h:37
STL namespace.
T pop_back(T... args)
T size(T... args)
T str(T... args)
T write(T... args)