Alexandria 2.31.0
SDC-CH common library for the Euclid project
Loading...
Searching...
No Matches
AsciiReader.cpp
Go to the documentation of this file.
1/*
2 * Copyright (C) 2012-2022 Euclid Science Ground Segment
3 *
4 * This library is free software; you can redistribute it and/or modify it under
5 * the terms of the GNU Lesser General Public License as published by the Free
6 * Software Foundation; either version 3.0 of the License, or (at your option)
7 * any later version.
8 *
9 * This library is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
11 * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
12 * details.
13 *
14 * You should have received a copy of the GNU Lesser General Public License
15 * along with this library; if not, write to the Free Software Foundation, Inc.,
16 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18
25#include <boost/algorithm/string.hpp>
26#include <fstream>
27#include <set>
28
29#if BOOST_VERSION < 107300
30#include <boost/io/detail/quoted_manip.hpp>
31#else
32#include <boost/io/quoted.hpp>
33#endif
34
37#include "Table/AsciiReader.h"
38
39#include "AsciiReaderHelper.h"
40#include "ReaderHelper.h"
41
42namespace Euclid {
43namespace Table {
44
46 std::string line;
47 auto pos = in.tellg();
48 getline(in, line);
49 in.seekg(pos);
50 return line;
51}
52
54
55AsciiReader::AsciiReader(const std::string& filename) : AsciiReader(create<std::ifstream>(filename)) {}
56
58 : m_stream_holder(std::move(stream_holder)) {}
59
62 throw Elements::Exception() << "Changing comment indicator after reading "
63 << "has started is not allowed";
64 }
65 if (indicator.empty()) {
66 throw Elements::Exception() << "Empty string as comment indicator";
67 }
68 m_comment = indicator;
69 return *this;
70}
71
74 throw Elements::Exception() << "Fixing the column names after reading "
75 << "has started is not allowed";
76 }
77
78 m_column_names = std::move(column_names);
79
81 static const regex::regex vertical_whitespace{".*[\\n\\v\\f\\r].*"}; // Checks if input contains any whitespace
82 // characters
83 for (const auto& name : m_column_names) {
84 if (name.empty()) {
85 throw Elements::Exception() << "Empty string column names are not allowed";
86 }
87 if (regex_match(name, vertical_whitespace)) {
88 throw Elements::Exception() << "Column name '" << name << "' contains "
89 << "vertical whitespace characters";
90 }
91 if (!set.insert(name).second) { // Check for duplicate names
92 throw Elements::Exception() << "Duplicate column name " << name;
93 }
94 }
96 throw Elements::Exception() << "Different number of column names and types";
97 }
98
99 return *this;
100}
101
103 if (m_reading_started) {
104 throw Elements::Exception() << "Fixing the column types after reading "
105 << "has started is not allowed";
106 }
107
108 std::transform(column_types.begin(), column_types.end(), std::back_inserter(m_column_types),
109 [](std::type_index type) { return std::make_pair(type, std::size_t(0)); });
110
112 throw Elements::Exception() << "Different number of column names and types";
113 }
114
115 return *this;
116}
117
119 if (m_reading_started) {
120 throw Elements::Exception() << "Fixing the column types after reading "
121 << "has started is not allowed";
122 }
123
124 m_column_types = std::move(column_types);
125
127 throw Elements::Exception() << "Different number of column names and types";
128 }
129
130 return *this;
131}
132
134 if (m_column_info != nullptr) {
135 return;
136 }
137 m_reading_started = true;
138
139 auto& in = m_stream_holder->ref();
140
141 size_t columns_number = countColumns(in, m_comment);
142 if (!m_column_names.empty() && m_column_names.size() != columns_number) {
143 throw Elements::Exception() << "Columns number in stream (" << columns_number
144 << ") does not match the column names number (" << m_column_names.size() << ")";
145 }
146 if (!m_column_types.empty() && m_column_types.size() != columns_number) {
147 throw Elements::Exception() << "Columns number in stream (" << columns_number
148 << ") does not match the column types number (" << m_column_types.size() << ")";
149 }
150
151 auto auto_names = autoDetectColumnNames(in, m_comment, columns_number);
152 auto auto_desc = autoDetectColumnDescriptions(in, m_comment);
153
157 std::vector<std::string> descriptions;
158 auto first_line = firstDataLine(in, m_comment);
159
160 for (size_t i = 0; i < columns_number; ++i) {
161 if (m_column_names.empty()) {
162 names.emplace_back(auto_names[i]);
163 } else {
165 }
166 auto info = auto_desc.find(auto_names[i]);
167 if (info != auto_desc.end()) {
168 if (m_column_types.empty()) {
169 types.emplace_back(info->second.type, info->second.size);
170 } else {
172 }
173 units.emplace_back(info->second.unit);
174 descriptions.emplace_back(info->second.description);
175 } else {
176 if (!m_column_types.empty()) {
178 } else if (i < first_line.size()) {
179 types.emplace_back(guessColumnType(first_line[i]));
180 } else {
181 types.emplace_back(typeid(std::string), 0);
182 }
183 units.emplace_back("");
184 descriptions.emplace_back("");
185 }
186 }
187 m_column_info = createColumnInfo(names, types, units, descriptions);
188}
189
194
196 std::ostringstream comment;
197
198 m_reading_started = true;
199 auto& in = m_stream_holder->ref();
200 while (in && _peekLine(in).compare(0, m_comment.size(), m_comment) == 0) {
201 std::string line;
202 getline(in, line);
203 line = line.substr(m_comment.size());
204 boost::trim(line);
205 comment << line << '\n';
206 }
207
208 auto full_comment = comment.str();
209 boost::trim(full_comment);
210 return full_comment;
211}
212
215 auto& in = m_stream_holder->ref();
216
217 std::vector<Row> row_list;
218 while (in && rows != 0) {
219 std::string line;
220 getline(in, line);
221 auto tokens = splitLine(line, m_comment);
222 if (tokens.empty()) {
223 continue;
224 }
225 if (tokens.size() != m_column_info->size()) {
226 throw Elements::Exception() << "Line with wrong number of cells: " << line;
227 }
228
230 values.reserve(tokens.size());
231 std::size_t index = 0;
232 std::transform(tokens.begin(), tokens.end(), std::back_inserter(values), [this, &index](const std::string& token) {
233 return convertToCellType(token, m_column_info->getDescription(index++).type);
234 });
235 row_list.push_back(Row{std::move(values), m_column_info});
236 }
237
238 if (row_list.empty()) {
239 throw Elements::Exception() << "No more table rows left";
240 }
241 return Table{std::move(row_list)};
242}
243
244void AsciiReader::skip(long rows) {
246 auto& in = m_stream_holder->ref();
247
248 while (in && rows != 0) {
249 std::string line;
250 getline(in, line);
251 size_t comment_pos = line.find(m_comment);
252 if (comment_pos != std::string::npos) {
253 line = line.substr(0, comment_pos);
254 }
255 boost::trim(line);
256 if (!line.empty()) {
257 --rows;
258 }
259 }
260}
261
265
269
270} // namespace Table
271} // namespace Euclid
T back_inserter(T... args)
T begin(T... args)
TableReader implementation for reading ASCII tables from streams.
Definition AsciiReader.h:87
void skip(long rows) override
Implements the TableReader::skip() contract.
std::string getComment() override
AsciiReader & fixColumnNames(std::vector< std::string > column_names)
Overrides the automatically detected column names.
std::vector< std::string > m_column_names
AsciiReader(std::istream &stream)
Constructs an AsciiReader which reads from the given stream.
AsciiReader & fixColumnTypes(std::vector< std::type_index > column_types)
Overrides the automatically detected column types.
Table readImpl(long rows) override
Reads the next rows into a Table.
std::vector< std::pair< std::type_index, std::size_t > > m_column_types
bool hasMoreRows() override
Implements the TableReader::hasMoreRows() contract.
std::shared_ptr< ColumnInfo > m_column_info
const ColumnInfo & getInfo() override
Returns the column information of the table.
std::size_t rowsLeft() override
Implements the TableReader::rowsLeft() contract.
std::unique_ptr< InstOrRefHolder< std::istream > > m_stream_holder
AsciiReader & setCommentIndicator(const std::string &indicator)
Set the comment indicator.
Provides information about the columns of a Table.
Definition ColumnInfo.h:52
Represents one row of a Table.
Definition Row.h:57
Represents a table.
Definition Table.h:49
T emplace_back(T... args)
T empty(T... args)
T end(T... args)
T find(T... args)
T move(T... args)
std::vector< std::string > splitLine(std::string line, const std::string &comment)
bool hasNextRow(std::istream &in, const std::string &comment)
static std::string _peekLine(std::istream &in)
std::map< std::string, ColumnDescription > autoDetectColumnDescriptions(std::istream &in, const std::string &comment)
Reads the column descriptions of the given stream.
size_t countColumns(std::istream &in, const std::string &comment)
Returns the number of whitespace separated tokens of the first non commented line.
std::vector< std::string > autoDetectColumnNames(std::istream &in, const std::string &comment, size_t columns_number)
Reads the column names of the given stream.
std::pair< std::type_index, std::size_t > guessColumnType(const std::string &token)
std::vector< std::string > firstDataLine(std::istream &in, const std::string &comment)
std::shared_ptr< ColumnInfo > createColumnInfo(const std::vector< std::string > &names, const std::vector< std::pair< std::type_index, std::size_t > > &types, const std::vector< std::string > &units, const std::vector< std::string > &descriptions)
Creates a ColumnInfo object from the given names and types.
std::size_t countRemainingRows(std::istream &in, const std::string &comment)
STL namespace.
T push_back(T... args)
T reserve(T... args)
T seekg(T... args)
T size(T... args)
T str(T... args)
T substr(T... args)
T tellg(T... args)
T transform(T... args)