8#ifndef INCLUDED_ORCUS_SAX_PARSER_HPP
9#define INCLUDED_ORCUS_SAX_PARSER_HPP
11#include "sax_parser_base.hpp"
98 (void)val; (void)transient;
130template<
typename HandlerT,
typename ConfigT = sax_parser_default_config>
134 typedef HandlerT handler_type;
135 typedef ConfigT config_type;
137 sax_parser(std::string_view content, handler_type& handler);
138 ~sax_parser() =
default;
151 void element_open(std::ptrdiff_t begin_pos);
152 void element_close(std::ptrdiff_t begin_pos);
154 void declaration(
const char* name_check);
161 handler_type& m_handler;
164template<
typename HandlerT,
typename ConfigT>
165sax_parser<HandlerT,ConfigT>::sax_parser(std::string_view content, handler_type& handler) :
171template<
typename HandlerT,
typename ConfigT>
172void sax_parser<HandlerT,ConfigT>::parse()
177 skip_space_and_control();
180 assert(m_buffer_pos == 0);
183template<
typename HandlerT,
typename ConfigT>
184void sax_parser<HandlerT,ConfigT>::header()
192 skip_space_and_control();
194 if (!has_char() || cur_char() !=
'<')
197 if (config_type::baseline_version >= 11)
201 if (next_char_checked() !=
'?')
208template<
typename HandlerT,
typename ConfigT>
209void sax_parser<HandlerT,ConfigT>::body()
213 if (cur_char() ==
'<')
216 if (!m_root_elem_open)
220 else if (m_nest_level)
228template<
typename HandlerT,
typename ConfigT>
229void sax_parser<HandlerT,ConfigT>::element()
231 assert(cur_char() ==
'<');
232 std::ptrdiff_t pos = offset();
233 char c = next_char_checked();
243 declaration(
nullptr);
250template<
typename HandlerT,
typename ConfigT>
251void sax_parser<HandlerT,ConfigT>::element_open(std::ptrdiff_t begin_pos)
254 element_name(elem, begin_pos);
258 skip_space_and_control();
259 char c = cur_char_checked();
263 if (next_and_char() !=
'>')
266 elem.end_pos = offset();
267 m_handler.start_element(elem);
269 m_handler.end_element(elem);
271 m_root_elem_open =
false;
272#if ORCUS_DEBUG_SAX_PARSER
273 cout <<
"element_open: ns='" << elem.ns <<
"', name='" << elem.name <<
"' (self-closing)" << endl;
281 elem.end_pos = offset();
283 m_handler.start_element(elem);
285#if ORCUS_DEBUG_SAX_PARSER
286 cout <<
"element_open: ns='" << elem.ns <<
"', name='" << elem.name <<
"'" << endl;
295template<
typename HandlerT,
typename ConfigT>
296void sax_parser<HandlerT,ConfigT>::element_close(std::ptrdiff_t begin_pos)
298 assert(cur_char() ==
'/');
302 element_name(elem, begin_pos);
304 if (cur_char() !=
'>')
307 elem.end_pos = offset();
309 m_handler.end_element(elem);
310#if ORCUS_DEBUG_SAX_PARSER
311 cout <<
"element_close: ns='" << elem.ns <<
"', name='" << elem.name <<
"'" << endl;
314 m_root_elem_open =
false;
317template<
typename HandlerT,
typename ConfigT>
318void sax_parser<HandlerT,ConfigT>::special_tag()
320 assert(cur_char() ==
'!');
322 size_t len = available_size();
326 switch (next_and_char())
331 if (next_and_char() !=
'-')
345 expects_next(
"CDATA[", 6);
353 expects_next(
"OCTYPE", 6);
354 skip_space_and_control();
364template<
typename HandlerT,
typename ConfigT>
365void sax_parser<HandlerT,ConfigT>::declaration(
const char* name_check)
367 assert(cur_char() ==
'?');
371 std::string_view decl_name;
373#if ORCUS_DEBUG_SAX_PARSER
374 cout <<
"sax_parser::declaration: start name='" << decl_name <<
"'" << endl;
377 if (name_check && decl_name != name_check)
379 std::ostringstream os;
380 os <<
"declaration name of '" << name_check <<
"' was expected, but '" << decl_name <<
"' was found instead.";
384 m_handler.start_declaration(decl_name);
385 skip_space_and_control();
388 while (cur_char_checked() !=
'?')
391 skip_space_and_control();
393 if (next_char_checked() !=
'>')
396 m_handler.end_declaration(decl_name);
399#if ORCUS_DEBUG_SAX_PARSER
400 cout <<
"sax_parser::declaration: end name='" << decl_name <<
"'" << endl;
404template<
typename HandlerT,
typename ConfigT>
405void sax_parser<HandlerT,ConfigT>::cdata()
407 size_t len = available_size();
411 const char* p0 = mp_char;
412 size_t i = 0, match = 0;
413 for (
char c = cur_char(); i < len; ++i, c = next_and_char())
427 else if (c ==
'>' && match == 2)
430 size_t cdata_len = i - 2;
431 m_handler.characters(std::string_view(p0, cdata_len),
false);
441template<
typename HandlerT,
typename ConfigT>
442void sax_parser<HandlerT,ConfigT>::doctype()
446 name(param.root_element);
447 skip_space_and_control();
450 size_t len = available_size();
454 param.keyword = sax::doctype_declaration::keyword_type::dtd_private;
458 if (next_and_char() !=
'U' || next_and_char() !=
'B' || next_and_char() !=
'L' || next_and_char() !=
'I' || next_and_char() !=
'C')
461 param.keyword = sax::doctype_declaration::keyword_type::dtd_public;
465 if (next_and_char() !=
'Y' || next_and_char() !=
'S' || next_and_char() !=
'T' || next_and_char() !=
'E' || next_and_char() !=
'M')
470 skip_space_and_control();
473 value(param.fpi,
false);
475 has_char_throw(
"DOCTYPE section too short.");
476 skip_space_and_control();
477 has_char_throw(
"DOCTYPE section too short.");
479 if (cur_char() ==
'>')
482#if ORCUS_DEBUG_SAX_PARSER
483 cout <<
"sax_parser::doctype: root='" << param.root_element <<
"', fpi='" << param.fpi <<
"'" << endl;
485 m_handler.doctype(param);
491 value(param.uri,
false);
493 has_char_throw(
"DOCTYPE section too short.");
494 skip_space_and_control();
495 has_char_throw(
"DOCTYPE section too short.");
497 if (cur_char() !=
'>')
498 throw malformed_xml_error(
"malformed DOCTYPE section - closing '>' expected but not found.", offset());
500#if ORCUS_DEBUG_SAX_PARSER
501 cout <<
"sax_parser::doctype: root='" << param.root_element <<
"', fpi='" << param.fpi <<
"' uri='" << param.uri <<
"'" << endl;
503 m_handler.doctype(param);
507template<
typename HandlerT,
typename ConfigT>
508void sax_parser<HandlerT,ConfigT>::characters()
510 const char* p0 = mp_char;
511 for (; has_char(); next())
513 if (cur_char() ==
'<')
516 if (cur_char() ==
'&')
521 buf.append(p0, mp_char-p0);
522 characters_with_encoded_char(buf);
524 m_handler.characters(std::string_view{},
false);
526 m_handler.characters(buf.str(),
true);
533 std::string_view val(p0, mp_char-p0);
534 m_handler.characters(val,
false);
538template<
typename HandlerT,
typename ConfigT>
539void sax_parser<HandlerT,ConfigT>::attribute()
542 attribute_name(attr.ns, attr.name);
544#if ORCUS_DEBUG_SAX_PARSER
545 cout <<
"sax_parser::attribute: ns='" << attr.ns <<
"', name='" << attr.name <<
"'" << endl;
548 skip_space_and_control();
550 char c = cur_char_checked();
553 std::ostringstream os;
554 os <<
"Attribute must begin with 'name=..'. (ns='" << attr.ns <<
"', name='" << attr.name <<
"')";
559 skip_space_and_control();
561 attr.transient = value(attr.value,
true);
566#if ORCUS_DEBUG_SAX_PARSER
567 cout <<
"sax_parser::attribute: value='" << attr.value <<
"'" << endl;
570 m_handler.attribute(attr);
Definition cell_buffer.hpp:22
Definition parser_base.hpp:23
Definition sax_parser_base.hpp:108
Definition sax_parser.hpp:28
void end_declaration(std::string_view decl)
Definition sax_parser.hpp:57
void doctype(const orcus::sax::doctype_declaration &dtd)
Definition sax_parser.hpp:35
void attribute(const orcus::sax::parser_attribute &attr)
Definition sax_parser.hpp:109
void characters(std::string_view val, bool transient)
Definition sax_parser.hpp:96
void start_declaration(std::string_view decl)
Definition sax_parser.hpp:47
void end_element(const orcus::sax::parser_element &elem)
Definition sax_parser.hpp:77
void start_element(const orcus::sax::parser_element &elem)
Definition sax_parser.hpp:67
Definition sax_parser_base.hpp:37
Definition sax_parser_base.hpp:96
Definition sax_parser_base.hpp:77
Definition sax_parser.hpp:18
static constexpr uint8_t baseline_version
Definition sax_parser.hpp:24