libwreport 3.40
input.h
1#ifndef WREPORT_BUFR_INPUT_H
2#define WREPORT_BUFR_INPUT_H
3
4#include <functional>
5#include <string>
6#include <wreport/bulletin.h>
7#include <wreport/error.h>
8#include <wreport/var.h>
9
10namespace wreport {
11struct Bulletin;
12
13namespace bulletin {
14struct AssociatedField;
15}
16
17namespace bufr {
18
20{
21 Bulletin& out;
22 unsigned subset_count;
23 DispatchToSubsets(Bulletin& out, unsigned subset_count)
24 : out(out), subset_count(subset_count)
25 {
26 }
27
28 void add_missing(Varinfo info)
29 {
30 for (unsigned i = 0; i < subset_count; ++i)
31 out.subsets[i].store_variable_undef(info);
32 }
33 void add_same(const Var& var)
34 {
35 for (unsigned i = 0; i < subset_count; ++i)
36 out.subsets[i].store_variable(Var(var));
37 }
38 void add_var(unsigned subset, Var&& var)
39 {
40 out.subsets[subset].store_variable(var);
41 }
42};
43
47class Input
48{
49protected:
54 void scan_section_length(unsigned sec_no);
55
56public:
58 const uint8_t* data;
59
61 size_t data_len;
62
70 const char* fname = nullptr;
71
79 size_t start_offset = 0;
80
82 unsigned s4_cursor = 0;
83
85 uint8_t pbyte = 0;
86
88 int pbyte_len = 0;
89
91 unsigned sec[6];
92
99 explicit Input(const std::string& in);
100
109
121 void scan_other_sections(bool has_optional);
122
124 unsigned offset() const { return s4_cursor; }
125
127 unsigned bits_left() const
128 {
129 return static_cast<unsigned>((data_len - s4_cursor) * 8 + pbyte_len);
130 }
131
133 inline unsigned read_byte(unsigned pos) const
134 {
135 return (unsigned)data[pos];
136 }
137
139 inline unsigned read_byte(unsigned section, unsigned pos) const
140 {
141 return (unsigned)data[sec[section] + pos];
142 }
143
145 unsigned read_number(unsigned pos, unsigned byte_len) const
146 {
147 unsigned res = 0;
148 for (unsigned i = 0; i < byte_len; ++i)
149 {
150 res <<= 8;
151 res |= data[pos + i];
152 }
153 return res;
154 }
155
160 inline unsigned read_number(unsigned section, unsigned pos,
161 unsigned byte_len) const
162 {
163 return read_number(sec[section] + pos, byte_len);
164 }
165
170 uint32_t get_bits(unsigned n)
171 {
172 uint32_t result = 0;
173
174 if (s4_cursor == data_len)
176 "end of buffer while looking for %u bits of bit-packed data",
177 n);
178
179 // TODO: review and benchmark and possibly simplify
180 // (a possible alternative approach is to keep a current bitmask that
181 // starts at 0x80 and is shifted right by 1 at each read until it
182 // reaches 0, and get rid of pbyte_len)
183 for (unsigned i = 0; i < n; i++)
184 {
185 if (pbyte_len == 0)
186 {
187 pbyte_len = 8;
188 pbyte = data[s4_cursor++];
189 }
190 result <<= 1;
191 if (pbyte & 0x80)
192 result |= 1;
193 pbyte <<= 1;
194 pbyte_len--;
195 }
196
197 return result;
198 }
199
203 void skip_bits(unsigned n)
204 {
205 if (s4_cursor == data_len)
207 "end of buffer while looking for %u bits of bit-packed data",
208 n);
209
210 for (unsigned i = 0; i < n; i++)
211 {
212 if (pbyte_len == 0)
213 {
214 pbyte_len = 8;
215 pbyte = data[s4_cursor++];
216 }
217 pbyte <<= 1;
218 pbyte_len--;
219 }
220 }
221
223 void debug_dump_next_bits(const char* desc, unsigned count,
224 const std::vector<unsigned>& groups = {}) const;
225
230 void debug_find_sequence(const char* pattern) const;
231
233 void parse_error(const char* fmt, ...) const WREPORT_THROWF_ATTRS(2, 3);
234
236 void parse_error(unsigned pos, const char* fmt, ...) const
238
241 void parse_error(unsigned section, unsigned pos, const char* fmt, ...) const
243
256 void check_available_data(unsigned pos, size_t datalen,
257 const char* expected);
258
273 void check_available_message_data(unsigned section, unsigned pos,
274 size_t datalen, const char* expected);
275
290 void check_available_section_data(unsigned section, unsigned pos,
291 size_t datalen, const char* expected);
292
305 void decode_compressed_number(Var& dest, uint32_t base, unsigned diffbits);
306
315 void decode_number(Var& dest);
316
320 bool decode_compressed_base(Varinfo info, uint32_t& base,
321 uint32_t& diffbits);
322
327 void decode_compressed_number(Varinfo info, unsigned subsets,
328 std::function<void(unsigned, Var&&)> dest);
329
330 void decode_string(Varinfo info, unsigned subsets, DispatchToSubsets& dest);
331
332 void decode_compressed_number(Varinfo info, unsigned subsets,
333 DispatchToSubsets& dest);
334
340 const bulletin::AssociatedField& afield,
341 unsigned subsets,
342 std::function<void(unsigned, Var&&)> dest);
343
355 void decode_compressed_semantic_number(Var& dest, unsigned subsets);
356
373 bool decode_string(unsigned bit_len, char* str, size_t& len);
374
386 void decode_string(Var& dest);
387
399 void decode_string(Var& dest, unsigned subsets);
400
405 void decode_string(Varinfo info, unsigned subsets,
406 std::function<void(unsigned, Var&&)> dest);
407
419 void decode_binary(Var& dest);
420
428 std::string decode_uncompressed_bitmap(unsigned size);
429
443 std::string decode_compressed_bitmap(unsigned size);
444};
445
446} // namespace bufr
447} // namespace wreport
448#endif
Storage for the decoded data of a BUFR or CREX message.
Definition bulletin.h:30
std::vector< Subset > subsets
Decoded variables.
Definition bulletin.h:124
A physical variable.
Definition var.h:25
Binary buffer with bit-level read operations.
Definition input.h:48
size_t data_len
Input buffer size.
Definition input.h:61
unsigned read_byte(unsigned pos) const
Read a byte value at offset pos.
Definition input.h:133
Input(const std::string &in)
Wrap a string iinto a Input.
uint32_t get_bits(unsigned n)
Get the integer value of the next 'n' bits from the decode input n must be <= 32.
Definition input.h:170
void check_available_section_data(unsigned section, unsigned pos, size_t datalen, const char *expected)
Check that the given section in the input buffer contains at least datalen characters after offset po...
void decode_compressed_number(Var &dest, uint32_t base, unsigned diffbits)
Decode a compressed number as described by dest.info(), ad set it as value for dest.
void debug_dump_next_bits(const char *desc, unsigned count, const std::vector< unsigned > &groups={}) const
Dump to stderr 'count' bits of 'buf', starting at the 'ofs-th' bit.
uint8_t pbyte
Byte we are currently decoding.
Definition input.h:85
std::string decode_uncompressed_bitmap(unsigned size)
Decode an uncompressed bitmap of size bits.
void scan_other_sections(bool has_optional)
Scan the message filling in the sec[] array of section start offsets of all sections from 2 on.
unsigned bits_left() const
Return the number of bits left in the message to be decoded.
Definition input.h:127
void scan_lead_sections()
Scan the message filling in the sec[] array of start offsets of sections 0 and 1.
void scan_section_length(unsigned sec_no)
Scan length of section sec_no, filling in the start of the next section in sec[sec_no + 1].
unsigned offset() const
Return the current decoding byte offset.
Definition input.h:124
unsigned s4_cursor
Offset of the byte we are currently decoding.
Definition input.h:82
void void void void check_available_data(unsigned pos, size_t datalen, const char *expected)
Check that the input buffer contains at least datalen characters after offset pos; throw error_parse ...
unsigned read_number(unsigned pos, unsigned byte_len) const
Read a big endian integer value byte_len bytes long, at offset pos.
Definition input.h:145
void decode_compressed_number_af(Varinfo info, const bulletin::AssociatedField &afield, unsigned subsets, std::function< void(unsigned, Var &&)> dest)
Decode a number as described by info from a compressed bufr with subsets subsets, and send the result...
void debug_find_sequence(const char *pattern) const
Match the given pattern as regexp on the still unread input bitstream, with bits converted to a strin...
unsigned sec[6]
Offsets of the start of BUFR sections.
Definition input.h:91
size_t start_offset
File offset of the start of the message.
Definition input.h:79
void check_available_message_data(unsigned section, unsigned pos, size_t datalen, const char *expected)
Check that the input buffer contains at least datalen characters after offset pos in section section;...
unsigned read_byte(unsigned section, unsigned pos) const
Read a byte value at offset pos inside section section.
Definition input.h:139
unsigned read_number(unsigned section, unsigned pos, unsigned byte_len) const
Read a big endian integer value byte_len bytes long, at offset pos inside section section.
Definition input.h:160
const uint8_t * data
Input buffer.
Definition input.h:58
void skip_bits(unsigned n)
Skip the next n bits.
Definition input.h:203
const char * fname
Input file name (optional).
Definition input.h:70
void decode_binary(Var &dest)
Decode a generic binary value as-is, as described by dest.info(), ad set it as value for dest.
bool decode_compressed_base(Varinfo info, uint32_t &base, uint32_t &diffbits)
Decode the base value for a variable in a compressed BUFR.
std::string decode_compressed_bitmap(unsigned size)
Decode a "compressed" bitmap of size bits.
void decode_compressed_semantic_number(Var &dest, unsigned subsets)
Decode a number as described by dest.info(), and set it as value for dest.
int pbyte_len
Bits left in pbyte to decode.
Definition input.h:88
void parse_error(const char *fmt,...) const WREPORT_THROWF_ATTRS(2
Throw an error_parse at the current decoding location.
void decode_number(Var &dest)
Decode a number as described by dest.info(), and set it as value for dest.
wreport exceptions.
#define WREPORT_THROWF_ATTRS(a, b)
Tell the compiler that a function always throws and expects printf-style arguments.
Definition error.h:56
String functions.
Definition benchmark.h:13
Information about a variable.
Definition varinfo.h:140
Definition associated_fields.h:13