Alexandria 2.31.0
SDC-CH common library for the Euclid project
Loading...
Searching...
No Matches
Table2Numpy.cpp
Go to the documentation of this file.
1/*
2 * Copyright (C) 2022 Euclid Science Ground Segment
3 *
4 * This library is free software; you can redistribute it and/or modify it under
5 * the terms of the GNU Lesser General Public License as published by the Free
6 * Software Foundation; either version 3.0 of the License, or (at your option)
7 * any later version.
8 *
9 * This library is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
11 * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
12 * details.
13 *
14 * You should have received a copy of the GNU Lesser General Public License
15 * along with this library; if not, write to the Free Software Foundation, Inc.,
16 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18
19#include "Pyston/Table2Numpy.h"
21#include <boost/python/list.hpp>
22#include <boost/python/numpy.hpp>
23#include <boost/python/suite/indexing/vector_indexing_suite.hpp>
24#include <boost/python/tuple.hpp>
25
26namespace py = boost::python;
27namespace np = boost::python::numpy;
28
29namespace Pyston {
30
31namespace {
36template <typename T>
37py::tuple getVectorShape(const Euclid::Table::Table& table, size_t idx) {
38 auto& first_row = *table.begin();
39 auto& first_value = boost::get<std::vector<T>>(first_row[idx]);
40 auto size = first_value.size();
41
42 // Make sure all entries have the same shape!
43 for (auto& row : table) {
44 if (size != boost::get<std::vector<T>>(row[idx]).size()) {
45 throw Elements::Exception("All vectors on the column must have the same size");
46 }
47 }
48
49 return py::make_tuple(size);
50}
51
55std::size_t getStringShape(const Euclid::Table::Table& table, size_t idx) {
56 auto& first_row = *table.begin();
57 auto& first_value = boost::get<std::string>(first_row[idx]);
58 auto size = first_value.size();
59
60 // Make sure all entries have the same shape!
61 for (auto& row : table) {
62 if (size != boost::get<std::string>(row[idx]).size()) {
63 throw Elements::Exception("All vectors on the column must have the same size");
64 }
65 }
66
67 return size + 1;
68}
69
74template <typename T>
75py::tuple getNdArrayShape(const Euclid::Table::Table& table, size_t idx) {
76 auto& first_row = *table.begin();
77 auto& first_value = boost::get<Euclid::NdArray::NdArray<T>>(first_row[idx]);
78 auto shape = first_value.shape();
79
80 // Make sure all entries have the same shape!
81 for (auto& row : table) {
82 if (shape != boost::get<Euclid::NdArray::NdArray<T>>(row[idx]).shape()) {
83 throw Elements::Exception("All NdArrays on the column must have the same shape");
84 }
85 }
86
87 // Need to convert the std::vector to a Python tuple
88 py::list pyshape;
89 for (auto d : shape) {
90 pyshape.append(d);
91 }
92 return py::tuple(pyshape);
93}
94
98py::tuple numpyType(const Euclid::Table::Table& table, size_t idx) {
99 auto& descr = table.getColumnInfo()->getDescription(idx);
100 auto& name = descr.name;
101
102 std::type_index type = descr.type;
103
104 if (type == typeid(int32_t)) {
105 return py::make_tuple(name, "i4");
106 } else if (type == typeid(int64_t)) {
107 return py::make_tuple(name, "i8");
108 } else if (type == typeid(float)) {
109 return py::make_tuple(name, "f4");
110 } else if (type == typeid(double)) {
111 return py::make_tuple(name, "f8");
112 } else if (type == typeid(std::string)) {
113 return py::make_tuple(name, "S" + std::to_string(getStringShape(table, idx)));
114 } else if (type == typeid(std::vector<int32_t>)) {
115 return py::make_tuple(name, "i4", getVectorShape<int32_t>(table, idx));
116 } else if (type == typeid(std::vector<int64_t>)) {
117 return py::make_tuple(name, "i8", getVectorShape<int64_t>(table, idx));
118 } else if (type == typeid(std::vector<float>)) {
119 return py::make_tuple(name, "f4", getVectorShape<float>(table, idx));
120 } else if (type == typeid(std::vector<double>)) {
121 return py::make_tuple(name, "f8", getVectorShape<double>(table, idx));
122 } else if (type == typeid(Euclid::NdArray::NdArray<int32_t>)) {
123 return py::make_tuple(name, "i4", getNdArrayShape<int32_t>(table, idx));
124 } else if (type == typeid(Euclid::NdArray::NdArray<int64_t>)) {
125 return py::make_tuple(name, "i8", getNdArrayShape<int64_t>(table, idx));
126 } else if (type == typeid(Euclid::NdArray::NdArray<float>)) {
127 return py::make_tuple(name, "f4", getNdArrayShape<float>(table, idx));
128 } else if (type == typeid(Euclid::NdArray::NdArray<double>)) {
129 return py::make_tuple(name, "f8", getNdArrayShape<double>(table, idx));
130 } else {
131 throw Elements::Exception("Unknown type ") << type.name();
132 }
133}
134
138template <typename T>
140 auto& v = boost::get<std::vector<T>>(cell);
141 return std::make_tuple(sizeof(T) * v.size(), v.data());
142}
143
148 auto& v = boost::get<std::string>(cell);
149 return std::make_tuple(v.size() + 1, v.data());
150}
151
155template <typename T>
157 auto& v = boost::get<Euclid::NdArray::NdArray<T>>(cell);
158 return std::make_tuple(sizeof(T) * v.size(), &(*v.begin()));
159}
160
173off_t copyCell(void* dst, const Euclid::Table::ColumnDescription& descr, const Euclid::Table::Row::cell_type& cell) {
174 std::type_index type = descr.type;
175 off_t data_size = 0;
176 const void* data_ptr;
177
178 if (type == typeid(int32_t)) {
179 data_size = sizeof(int32_t);
180 data_ptr = &boost::get<int32_t>(cell);
181 } else if (type == typeid(int64_t)) {
182 data_size = sizeof(int64_t);
183 data_ptr = &boost::get<int64_t>(cell);
184 } else if (type == typeid(float)) {
185 data_size = sizeof(float);
186 data_ptr = &boost::get<float>(cell);
187 } else if (type == typeid(double)) {
188 data_size = sizeof(double);
189 data_ptr = &boost::get<double>(cell);
190 } else if (type == typeid(std::string)) {
191 std::tie(data_size, data_ptr) = getStringCellData(cell);
192 } else if (type == typeid(std::vector<int32_t>)) {
193 std::tie(data_size, data_ptr) = getVectorCellData<int32_t>(cell);
194 } else if (type == typeid(std::vector<int64_t>)) {
195 std::tie(data_size, data_ptr) = getVectorCellData<int64_t>(cell);
196 } else if (type == typeid(std::vector<float>)) {
197 std::tie(data_size, data_ptr) = getVectorCellData<float>(cell);
198 } else if (type == typeid(std::vector<double>)) {
199 std::tie(data_size, data_ptr) = getVectorCellData<double>(cell);
200 } else if (type == typeid(Euclid::NdArray::NdArray<int32_t>)) {
201 std::tie(data_size, data_ptr) = getNdArrayCellData<int32_t>(cell);
202 } else if (type == typeid(Euclid::NdArray::NdArray<int64_t>)) {
203 std::tie(data_size, data_ptr) = getNdArrayCellData<int64_t>(cell);
204 } else if (type == typeid(Euclid::NdArray::NdArray<float>)) {
205 std::tie(data_size, data_ptr) = getNdArrayCellData<float>(cell);
206 } else if (type == typeid(Euclid::NdArray::NdArray<double>)) {
207 std::tie(data_size, data_ptr) = getNdArrayCellData<double>(cell);
208 } else {
209 throw Elements::Exception("Unknown type ") << type.name();
210 }
211
212 std::memcpy(dst, data_ptr, data_size);
213 return data_size;
214}
215
216} // namespace
217
218boost::python::numpy::ndarray table2numpy(const Euclid::Table::Table& table) {
219 auto colinfo = table.getColumnInfo();
220 size_t ncols = colinfo->size();
221 size_t nrows = table.size();
222
223 py::list cols;
224
225 // Generate the dtypes for numpy
226 for (size_t i = 0; i < ncols; ++i) {
227 auto coldesc = colinfo->getDescription(i);
228 cols.append(numpyType(table, i));
229 }
230
231 // Convert the list of dtypes to an array description
232 np::dtype dtype(cols);
233
234 // Create the numpy array
235 auto array = np::zeros(py::make_tuple(table.size()), dtype);
236
237 // Copy into each row the content from the table
238 char* nd_ptr = array.get_data();
239 for (size_t i = 0; i < nrows; ++i) {
240 const auto& row = table[i];
241 for (size_t j = 0; j < ncols; ++j) {
242 nd_ptr += copyCell(nd_ptr, colinfo->getDescription(j), row[j]);
243 }
244 }
245
246 return array;
247}
248
249} // namespace Pyston
Contains the description of a specific column of a Table.
boost::variant< bool, int32_t, int64_t, float, double, std::string, std::vector< bool >, std::vector< int32_t >, std::vector< int64_t >, std::vector< float >, std::vector< double >, NdArray::NdArray< int32_t >, NdArray::NdArray< int64_t >, NdArray::NdArray< float >, NdArray::NdArray< double > > cell_type
The possible cell types.
Definition Row.h:64
Represents a table.
Definition Table.h:49
std::size_t size() const
Returns the number of rows in the table.
Definition Table.cpp:54
const_iterator begin() const
Returns a const iterator to the first row.
Definition Table.cpp:65
std::shared_ptr< ColumnInfo > getColumnInfo() const
Returns a ColumnInfo object describing the columns of the table.
Definition Table.cpp:50
T make_tuple(T... args)
T memcpy(T... args)
T name(T... args)
boost::python::numpy::ndarray table2numpy(const Euclid::Table::Table &table)
T tie(T... args)
T to_string(T... args)