26#include "ElementsKernel/Exception.h"
27#include "ElementsKernel/Logging.h"
29#include <boost/algorithm/string.hpp>
30#include <boost/lexical_cast.hpp>
31#include <boost/spirit/include/qi.hpp>
32#include <boost/tokenizer.hpp>
51 size_t comment_pos = line.
find(comment);
52 if (comment_pos != std::string::npos) {
53 line = line.
substr(0, comment_pos);
59 line_stream >> boost::io::quoted(token);
61 line_stream >> boost::io::quoted(token);
77 {
"bool",
typeid(bool)},
78 {
"boolean",
typeid(bool)},
80 {
"int",
typeid(int32_t)},
81 {
"long",
typeid(int64_t)},
82 {
"int32",
typeid(int32_t)},
83 {
"int64",
typeid(int64_t)},
85 {
"float",
typeid(float)},
86 {
"double",
typeid(double)},
88 {
"string",
typeid(std::string)},
90 {
"[bool]",
typeid(std::vector<bool>)},
91 {
"[boolean]",
typeid(std::vector<bool>)},
92 {
"[int]",
typeid(std::vector<int32_t>)},
93 {
"[long]",
typeid(std::vector<int64_t>)},
94 {
"[int32]",
typeid(std::vector<int32_t>)},
95 {
"[int64]",
typeid(std::vector<int64_t>)},
96 {
"[float]",
typeid(std::vector<float>)},
97 {
"[double]",
typeid(std::vector<double>)},
126 if (boost::starts_with(line, comment)) {
129 boost::replace_all(line, comment,
"");
131 if (boost::starts_with(line,
"Column:")) {
138 line_stream >> boost::io::quoted(name);
139 if (descriptions.
count(name) != 0) {
142 line_stream >> boost::io::quoted(token);
144 if (line_stream && !boost::starts_with(token,
"(") && token !=
"-") {
146 line_stream >> boost::io::quoted(token);
149 if (line_stream && boost::starts_with(token,
"(")) {
153 line_stream >> boost::io::quoted(token);
155 if (line_stream && token ==
"-") {
156 line_stream >> boost::io::quoted(token);
159 while (line_stream) {
160 desc << token <<
' ';
161 line_stream >> boost::io::quoted(token);
164 boost::trim(desc_str);
191 if (boost::starts_with(line, comment)) {
194 boost::replace_all(line, comment,
"");
199 if (boost::starts_with(line,
"Column:")) {
203 auto space_i = temp.
find(
' ');
205 temp = temp.
substr(0, space_i);
215 if (!last_comment.
empty()) {
218 line_stream >> boost::io::quoted(token);
219 while (line_stream) {
221 line_stream >> boost::io::quoted(token);
223 if (names.
size() != columns_number) {
230 if (!desc_names.
empty() && desc_names.
size() != columns_number) {
231 logger.warn() <<
"Number of column descriptions does not matches the number"
232 <<
" of the columns";
237 if (names.
size() < columns_number) {
238 for (
size_t i = names.
size() + 1; i <= columns_number; ++i) {
244 for (
const auto& name : names) {
245 if (!set.
insert(name).second) {
257 boost::char_separator<char> sep{
","};
258 boost::tokenizer<boost::char_separator<char>> tok{str, sep};
260 [](
const std::string& s) { return boost::get<T>(convertToCellType(s, typeid(T))); });
265NdArray<T> convertStringToNdArray(
const std::string& str) {
267 throw Elements::Exception() <<
"Cannot convert an empty string to a NdArray";
268 }
else if (str[0] !=
'<') {
269 throw Elements::Exception() <<
"Unexpected initial character for a NdArray: " << str[0];
272 auto closing_char = str.
find(
'>');
273 if (closing_char == std::string::npos) {
274 throw Elements::Exception() <<
"Could not find '>'";
277 auto shape_str = str.
substr(1, closing_char - 1);
278 auto shape_i = convertStringToVector<int32_t>(shape_str);
279 auto data = convertStringToVector<T>(str.
substr(closing_char + 1));
281 std::vector<size_t> shape_u;
292 if (value ==
"true" || value ==
"t" || value ==
"yes" || value ==
"y" || value ==
"1") {
294 }
else if (value ==
"false" || value ==
"f" || value ==
"no" || value ==
"n" || value ==
"0") {
300 {
typeid(int32_t), boost::lexical_cast<int32_t, const std::string&>},
301 {
typeid(
int64_t), boost::lexical_cast<int64_t, const std::string&>},
303 {
typeid(float), boost::lexical_cast<float, const std::string&>},
304 {
typeid(double), boost::lexical_cast<double, const std::string&>},
306 {
typeid(
std::string), boost::lexical_cast<std::string, const std::string&>},
326 return i->second(value);
327 }
catch (boost::bad_lexical_cast
const&) {
337 size_t comment_pos = line.
find(comment);
338 if (comment_pos != std::string::npos) {
339 line = line.
substr(0, comment_pos);
355 size_t comment_pos = line.
find(comment);
356 if (comment_pos != std::string::npos) {
357 line = line.
substr(0, comment_pos);
369 size_t comment_pos = line.
find(comment);
371 if (comment_pos != std::string::npos) {
372 line = line.
substr(0, comment_pos);
379 line_stream >> boost::io::quoted(token);
380 while (line_stream) {
382 line_stream >> boost::io::quoted(token);
392 while (in && boost::starts_with(line, comment)) {
399 namespace qi = boost::spirit::qi;
403 auto it1 = token.
begin();
405 if (qi::parse(it1, token.
end(), qi::long_, l) && it1 == token.
end()) {
408 if (qi::parse(it2, token.
end(), qi::double_, d) && it2 == token.
end()) {
409 return {
typeid(double), 0};
T back_inserter(T... args)
static Logging getLogger(const std::string &name="")
NdArray(std::vector< size_t > shape_)
boost::variant< bool, int32_t, int64_t, float, double, std::string, std::vector< bool >, std::vector< int32_t >, std::vector< int64_t >, std::vector< float >, std::vector< double >, NdArray::NdArray< int32_t >, NdArray::NdArray< int64_t >, NdArray::NdArray< float >, NdArray::NdArray< double > > cell_type
The possible cell types.
This class gets a stream as argument during construction and when it is deleted it sets the position ...
T emplace_back(T... args)
T forward_as_tuple(T... args)
std::vector< std::string > splitLine(std::string line, const std::string &comment)
std::type_index keywordToType(const std::string &keyword)
bool hasNextRow(std::istream &in, const std::string &comment)
std::map< std::string, ColumnDescription > autoDetectColumnDescriptions(std::istream &in, const std::string &comment)
Reads the column descriptions of the given stream.
size_t countColumns(std::istream &in, const std::string &comment)
Returns the number of whitespace separated tokens of the first non commented line.
std::vector< std::string > autoDetectColumnNames(std::istream &in, const std::string &comment, size_t columns_number)
Reads the column names of the given stream.
std::pair< std::type_index, std::size_t > guessColumnType(const std::string &token)
Row::cell_type convertToCellType(const std::string &value, std::type_index type)
Converts the given value to a Row::cell_type of the given type.
std::vector< std::string > firstDataLine(std::istream &in, const std::string &comment)
const std::vector< std::pair< std::string, std::type_index > > KeywordTypeMap
std::size_t countRemainingRows(std::istream &in, const std::string &comment)
const std::map< std::type_index, std::function< Row::cell_type(const std::string &)> > sCellConverter
static Elements::Logging logger