8#ifndef INCLUDED_ORCUS_SAX_PARSER_HPP
9#define INCLUDED_ORCUS_SAX_PARSER_HPP
13#include "sax_parser_base.hpp"
100 (void)val; (void)transient;
132template<
typename HandlerT,
typename ConfigT = sax_parser_default_config>
136 typedef HandlerT handler_type;
137 typedef ConfigT config_type;
139 sax_parser(std::string_view content, handler_type& handler);
140 ~sax_parser() =
default;
153 void element_open(std::ptrdiff_t begin_pos);
154 void element_close(std::ptrdiff_t begin_pos);
156 void declaration(
const char* name_check);
163 handler_type& m_handler;
166template<
typename HandlerT,
typename ConfigT>
167sax_parser<HandlerT,ConfigT>::sax_parser(std::string_view content, handler_type& handler) :
173template<
typename HandlerT,
typename ConfigT>
174void sax_parser<HandlerT,ConfigT>::parse()
179 skip_space_and_control();
182 assert(m_buffer_pos == 0);
185template<
typename HandlerT,
typename ConfigT>
186void sax_parser<HandlerT,ConfigT>::header()
194 skip_space_and_control();
196 if (!has_char() || cur_char() !=
'<')
199 if (config_type::baseline_version >= 11)
203 if (next_char_checked() !=
'?')
210template<
typename HandlerT,
typename ConfigT>
211void sax_parser<HandlerT,ConfigT>::body()
215 if (cur_char() ==
'<')
218 if (!m_root_elem_open)
222 else if (m_nest_level)
230template<
typename HandlerT,
typename ConfigT>
231void sax_parser<HandlerT,ConfigT>::element()
233 assert(cur_char() ==
'<');
234 std::ptrdiff_t pos = offset();
235 char c = next_char_checked();
245 declaration(
nullptr);
252template<
typename HandlerT,
typename ConfigT>
253void sax_parser<HandlerT,ConfigT>::element_open(std::ptrdiff_t begin_pos)
256 element_name(elem, begin_pos);
260 skip_space_and_control();
261 char c = cur_char_checked();
265 if (next_and_char() !=
'>')
268 elem.end_pos = offset();
269 m_handler.start_element(elem);
271 m_handler.end_element(elem);
273 m_root_elem_open =
false;
274#if ORCUS_DEBUG_SAX_PARSER
275 cout <<
"element_open: ns='" << elem.ns <<
"', name='" << elem.name <<
"' (self-closing)" << endl;
283 elem.end_pos = offset();
285 m_handler.start_element(elem);
287#if ORCUS_DEBUG_SAX_PARSER
288 cout <<
"element_open: ns='" << elem.ns <<
"', name='" << elem.name <<
"'" << endl;
297template<
typename HandlerT,
typename ConfigT>
298void sax_parser<HandlerT,ConfigT>::element_close(std::ptrdiff_t begin_pos)
300 assert(cur_char() ==
'/');
304 element_name(elem, begin_pos);
306 if (cur_char() !=
'>')
309 elem.end_pos = offset();
311 m_handler.end_element(elem);
312#if ORCUS_DEBUG_SAX_PARSER
313 cout <<
"element_close: ns='" << elem.ns <<
"', name='" << elem.name <<
"'" << endl;
316 m_root_elem_open =
false;
319template<
typename HandlerT,
typename ConfigT>
320void sax_parser<HandlerT,ConfigT>::special_tag()
322 assert(cur_char() ==
'!');
324 size_t len = available_size();
328 switch (next_and_char())
333 if (next_and_char() !=
'-')
347 expects_next(
"CDATA[", 6);
355 expects_next(
"OCTYPE", 6);
356 skip_space_and_control();
366template<
typename HandlerT,
typename ConfigT>
367void sax_parser<HandlerT,ConfigT>::declaration(
const char* name_check)
369 assert(cur_char() ==
'?');
373 std::string_view decl_name;
375#if ORCUS_DEBUG_SAX_PARSER
376 cout <<
"sax_parser::declaration: start name='" << decl_name <<
"'" << endl;
379 if (name_check && decl_name != name_check)
381 std::ostringstream os;
382 os <<
"declaration name of '" << name_check <<
"' was expected, but '" << decl_name <<
"' was found instead.";
386 m_handler.start_declaration(decl_name);
387 skip_space_and_control();
390 while (cur_char_checked() !=
'?')
393 skip_space_and_control();
395 if (next_char_checked() !=
'>')
398 m_handler.end_declaration(decl_name);
401#if ORCUS_DEBUG_SAX_PARSER
402 cout <<
"sax_parser::declaration: end name='" << decl_name <<
"'" << endl;
406template<
typename HandlerT,
typename ConfigT>
407void sax_parser<HandlerT,ConfigT>::cdata()
409 size_t len = available_size();
413 const char* p0 = mp_char;
414 size_t i = 0, match = 0;
415 for (
char c = cur_char(); i < len; ++i, c = next_and_char())
429 else if (c ==
'>' && match == 2)
432 size_t cdata_len = i - 2;
433 m_handler.characters(std::string_view(p0, cdata_len),
false);
443template<
typename HandlerT,
typename ConfigT>
444void sax_parser<HandlerT,ConfigT>::doctype()
448 name(param.root_element);
449 skip_space_and_control();
452 size_t len = available_size();
456 param.keyword = sax::doctype_declaration::keyword_type::dtd_private;
460 if (next_and_char() !=
'U' || next_and_char() !=
'B' || next_and_char() !=
'L' || next_and_char() !=
'I' || next_and_char() !=
'C')
463 param.keyword = sax::doctype_declaration::keyword_type::dtd_public;
467 if (next_and_char() !=
'Y' || next_and_char() !=
'S' || next_and_char() !=
'T' || next_and_char() !=
'E' || next_and_char() !=
'M')
472 skip_space_and_control();
475 value(param.fpi,
false);
477 has_char_throw(
"DOCTYPE section too short.");
478 skip_space_and_control();
479 has_char_throw(
"DOCTYPE section too short.");
481 if (cur_char() ==
'>')
484#if ORCUS_DEBUG_SAX_PARSER
485 cout <<
"sax_parser::doctype: root='" << param.root_element <<
"', fpi='" << param.fpi <<
"'" << endl;
487 m_handler.doctype(param);
493 value(param.uri,
false);
495 has_char_throw(
"DOCTYPE section too short.");
496 skip_space_and_control();
497 has_char_throw(
"DOCTYPE section too short.");
499 if (cur_char() !=
'>')
500 throw malformed_xml_error(
"malformed DOCTYPE section - closing '>' expected but not found.", offset());
502#if ORCUS_DEBUG_SAX_PARSER
503 cout <<
"sax_parser::doctype: root='" << param.root_element <<
"', fpi='" << param.fpi <<
"' uri='" << param.uri <<
"'" << endl;
505 m_handler.doctype(param);
509template<
typename HandlerT,
typename ConfigT>
510void sax_parser<HandlerT,ConfigT>::characters()
512 const char* p0 = mp_char;
513 for (; has_char(); next())
515 if (cur_char() ==
'<')
518 if (cur_char() ==
'&')
523 buf.append(p0, mp_char-p0);
524 characters_with_encoded_char(buf);
526 m_handler.characters(std::string_view{},
false);
528 m_handler.characters(buf.str(),
true);
535 std::string_view val(p0, mp_char-p0);
536 m_handler.characters(val,
false);
540template<
typename HandlerT,
typename ConfigT>
541void sax_parser<HandlerT,ConfigT>::attribute()
544 attribute_name(attr.ns, attr.name);
546#if ORCUS_DEBUG_SAX_PARSER
547 cout <<
"sax_parser::attribute: ns='" << attr.ns <<
"', name='" << attr.name <<
"'" << endl;
550 skip_space_and_control();
552 char c = cur_char_checked();
555 std::ostringstream os;
556 os <<
"Attribute must begin with 'name=..'. (ns='" << attr.ns <<
"', name='" << attr.name <<
"')";
561 skip_space_and_control();
563 attr.transient = value(attr.value,
true);
568#if ORCUS_DEBUG_SAX_PARSER
569 cout <<
"sax_parser::attribute: value='" << attr.value <<
"'" << endl;
572 m_handler.attribute(attr);
Definition cell_buffer.hpp:22
Definition parser_base.hpp:23
Definition sax_parser_base.hpp:108
Definition sax_parser.hpp:30
void end_declaration(std::string_view decl)
Definition sax_parser.hpp:59
void doctype(const orcus::sax::doctype_declaration &dtd)
Definition sax_parser.hpp:37
void attribute(const orcus::sax::parser_attribute &attr)
Definition sax_parser.hpp:111
void characters(std::string_view val, bool transient)
Definition sax_parser.hpp:98
void start_declaration(std::string_view decl)
Definition sax_parser.hpp:49
void end_element(const orcus::sax::parser_element &elem)
Definition sax_parser.hpp:79
void start_element(const orcus::sax::parser_element &elem)
Definition sax_parser.hpp:69
Definition sax_parser_base.hpp:37
Definition sax_parser_base.hpp:96
Definition sax_parser_base.hpp:77
Definition sax_parser.hpp:20
static constexpr uint8_t baseline_version
Definition sax_parser.hpp:26