Alexandria 2.31.0
SDC-CH common library for the Euclid project
Loading...
Searching...
No Matches
NpyCommon.cpp
Go to the documentation of this file.
1/*
2 * Copyright (C) 2012-2021 Euclid Science Ground Segment
3 *
4 * This library is free software; you can redistribute it and/or modify it under
5 * the terms of the GNU Lesser General Public License as published by the Free
6 * Software Foundation; either version 3.0 of the License, or (at your option)
7 * any later version.
8 *
9 * This library is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
11 * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
12 * details.
13 *
14 * You should have received a copy of the GNU Lesser General Public License
15 * along with this library; if not, write to the Free Software Foundation, Inc.,
16 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18
21
22namespace Euclid {
23namespace NdArray {
24
25void parseSingleValue(const std::string& descr, bool& big_endian, std::string& dtype) {
26 big_endian = (descr.front() == '>');
27 dtype = descr.substr(1);
28}
29
30inline void parseFieldValues(const std::string& descr, bool& big_endian, std::vector<std::string>& attrs,
31 std::string& dtype) {
32 static const regex::regex field_expr("\\('([^']*)',\\s*'([^']*)'\\)");
33
35 auto start = descr.begin();
36 auto end = descr.end();
37
38 while (regex::regex_search(start, end, match, field_expr)) {
39 bool endian_aux;
40 std::string dtype_aux;
41
42 parseSingleValue(match[2].str(), endian_aux, dtype_aux);
43 if (dtype.empty()) {
44 dtype = dtype_aux;
45 big_endian = endian_aux;
46 } else if (dtype != dtype_aux || big_endian != endian_aux) {
47 throw std::invalid_argument("NdArray only supports uniform types");
48 }
49 attrs.emplace_back(match[1].str());
50
51 start = match[0].second;
52 }
53}
54
55inline void parseNpyDict(const std::string& header, bool& fortran_order, bool& big_endian, std::string& dtype,
56 std::vector<size_t>& shape, std::vector<std::string>& attrs, size_t& n_elements) {
57 auto loc = header.find("fortran_order") + 16;
58 fortran_order = (header.substr(loc, 4) == "True");
59
60 loc = header.find("descr") + 8;
61
62 if (header[loc] == '\'') {
63 auto end = header.find('\'', loc + 1);
64 parseSingleValue(header.substr(loc + 1, end - loc - 1), big_endian, dtype);
65 } else if (header[loc] == '[') {
66 auto end = header.find(']', loc + 1);
67 parseFieldValues(header.substr(loc + 1, end - loc - 1), big_endian, attrs, dtype);
68 } else {
69 throw Elements::Exception() << "Failed to parse the array description: " << header;
70 }
71
72 loc = header.find("shape") + 9;
73 auto loc2 = header.find(')', loc);
74 auto shape_str = header.substr(loc, loc2 - loc);
75 if (shape_str.back() == ',')
76 shape_str.resize(shape_str.size() - 1);
77 shape = stringToVector<size_t>(shape_str);
78 n_elements = std::accumulate(shape.begin(), shape.end(), 1, std::multiplies<size_t>());
79}
80
82 size_t& n_elements) {
83 // Magic
84 char magic[6];
85 input.read(magic, sizeof(magic));
86 if (std::memcmp(magic, NPY_MAGIC, sizeof(NPY_MAGIC)) != 0) {
87 throw Elements::Exception() << "Unexpected magic sequence";
88 }
89
90 // Version and header len
91 little_uint32_t header_len;
92 little_uint16_t version;
93 input.read(reinterpret_cast<char*>(&version), sizeof(version));
94 if (version > 30) {
95 throw Elements::Exception() << "Only numpy arrays with version 3 or less are supported";
96 } else if (version.data()[0] == 1) {
97 // 16 bits integer in little endian
98 little_uint16_t aux;
99 input.read(reinterpret_cast<char*>(&aux), sizeof(aux));
100 header_len = aux;
101 } else {
102 // 32 bits integer in little endian
103 input.read(reinterpret_cast<char*>(&header_len), sizeof(header_len));
104 }
105
106 // Read header
107 std::string header(header_len, '\0');
108 input.read(&header[0], header_len);
109
110 // Parse header
111 bool fortran_order, big_endian;
112 parseNpyDict(header, fortran_order, big_endian, dtype, shape, attrs, n_elements);
113
114 if (fortran_order)
115 throw Elements::Exception() << "Fortran order not supported";
116
117 if ((big_endian && (BYTE_ORDER != BIG_ENDIAN)) || (!big_endian && (BYTE_ORDER != LITTLE_ENDIAN)))
118 throw Elements::Exception() << "Only native endianness supported for reading";
119}
120
121} // namespace NdArray
122} // namespace Euclid
T accumulate(T... args)
T begin(T... args)
T emplace_back(T... args)
T empty(T... args)
T end(T... args)
T find(T... args)
T front(T... args)
T memcmp(T... args)
void parseSingleValue(const std::string &descr, bool &big_endian, std::string &dtype)
Definition NpyCommon.cpp:25
void readNpyHeader(std::istream &input, std::string &dtype, std::vector< size_t > &shape, std::vector< std::string > &attrs, size_t &n_elements)
Definition NpyCommon.cpp:81
void parseFieldValues(const std::string &descr, bool &big_endian, std::vector< std::string > &attrs, std::string &dtype)
Definition NpyCommon.cpp:30
void parseNpyDict(const std::string &header, bool &fortran_order, bool &big_endian, std::string &dtype, std::vector< size_t > &shape, std::vector< std::string > &attrs, size_t &n_elements)
Definition NpyCommon.cpp:55
constexpr const char NPY_MAGIC[]
Definition NpyCommon.h:37
T read(T... args)
T substr(T... args)