10#ifndef INCLUDED_ORCUS_SAX_PARSER_HPP
11#define INCLUDED_ORCUS_SAX_PARSER_HPP
13#include "sax_parser_base.hpp"
101 (void)val; (void)transient;
133template<
typename HandlerT,
typename ConfigT = sax_parser_default_config>
137 typedef HandlerT handler_type;
138 typedef ConfigT config_type;
140 sax_parser(std::string_view content, handler_type& handler);
141 ~sax_parser() =
default;
154 void element_open(std::ptrdiff_t begin_pos);
155 void element_close(std::ptrdiff_t begin_pos);
157 void declaration(
const char* name_check);
164 handler_type& m_handler;
167template<
typename HandlerT,
typename ConfigT>
168sax_parser<HandlerT,ConfigT>::sax_parser(std::string_view content, handler_type& handler) :
174template<
typename HandlerT,
typename ConfigT>
175void sax_parser<HandlerT,ConfigT>::parse()
180 skip_space_and_control();
183 assert(m_buffer_pos == 0);
186template<
typename HandlerT,
typename ConfigT>
187void sax_parser<HandlerT,ConfigT>::header()
195 skip_space_and_control();
197 if (!has_char() || cur_char() !=
'<')
200 if (config_type::baseline_version >= 11)
204 if (next_char_checked() !=
'?')
211template<
typename HandlerT,
typename ConfigT>
212void sax_parser<HandlerT,ConfigT>::body()
216 if (cur_char() ==
'<')
219 if (!m_root_elem_open)
223 else if (m_nest_level)
231template<
typename HandlerT,
typename ConfigT>
232void sax_parser<HandlerT,ConfigT>::element()
234 assert(cur_char() ==
'<');
235 std::ptrdiff_t pos = offset();
236 char c = next_char_checked();
246 declaration(
nullptr);
253template<
typename HandlerT,
typename ConfigT>
254void sax_parser<HandlerT,ConfigT>::element_open(std::ptrdiff_t begin_pos)
257 element_name(elem, begin_pos);
261 skip_space_and_control();
262 char c = cur_char_checked();
266 if (next_and_char() !=
'>')
269 elem.end_pos = offset();
270 m_handler.start_element(elem);
272 m_handler.end_element(elem);
274 m_root_elem_open =
false;
275#if ORCUS_DEBUG_SAX_PARSER
276 cout <<
"element_open: ns='" << elem.ns <<
"', name='" << elem.name <<
"' (self-closing)" << endl;
284 elem.end_pos = offset();
286 m_handler.start_element(elem);
288#if ORCUS_DEBUG_SAX_PARSER
289 cout <<
"element_open: ns='" << elem.ns <<
"', name='" << elem.name <<
"'" << endl;
298template<
typename HandlerT,
typename ConfigT>
299void sax_parser<HandlerT,ConfigT>::element_close(std::ptrdiff_t begin_pos)
301 assert(cur_char() ==
'/');
305 element_name(elem, begin_pos);
307 if (cur_char() !=
'>')
310 elem.end_pos = offset();
312 m_handler.end_element(elem);
313#if ORCUS_DEBUG_SAX_PARSER
314 cout <<
"element_close: ns='" << elem.ns <<
"', name='" << elem.name <<
"'" << endl;
317 m_root_elem_open =
false;
320template<
typename HandlerT,
typename ConfigT>
321void sax_parser<HandlerT,ConfigT>::special_tag()
323 assert(cur_char() ==
'!');
325 size_t len = available_size();
329 switch (next_and_char())
334 if (next_and_char() !=
'-')
348 expects_next(
"CDATA[", 6);
356 expects_next(
"OCTYPE", 6);
357 skip_space_and_control();
367template<
typename HandlerT,
typename ConfigT>
368void sax_parser<HandlerT,ConfigT>::declaration(
const char* name_check)
370 assert(cur_char() ==
'?');
374 std::string_view decl_name;
376#if ORCUS_DEBUG_SAX_PARSER
377 cout <<
"sax_parser::declaration: start name='" << decl_name <<
"'" << endl;
380 if (name_check && decl_name != name_check)
382 std::ostringstream os;
383 os <<
"declaration name of '" << name_check <<
"' was expected, but '" << decl_name <<
"' was found instead.";
387 m_handler.start_declaration(decl_name);
388 skip_space_and_control();
391 while (cur_char_checked() !=
'?')
394 skip_space_and_control();
396 if (next_char_checked() !=
'>')
399 m_handler.end_declaration(decl_name);
402#if ORCUS_DEBUG_SAX_PARSER
403 cout <<
"sax_parser::declaration: end name='" << decl_name <<
"'" << endl;
407template<
typename HandlerT,
typename ConfigT>
408void sax_parser<HandlerT,ConfigT>::cdata()
410 size_t len = available_size();
414 const char* p0 = mp_char;
415 size_t i = 0, match = 0;
416 for (
char c = cur_char(); i < len; ++i, c = next_and_char())
430 else if (c ==
'>' && match == 2)
433 size_t cdata_len = i - 2;
434 m_handler.characters(std::string_view(p0, cdata_len),
false);
444template<
typename HandlerT,
typename ConfigT>
445void sax_parser<HandlerT,ConfigT>::doctype()
449 name(param.root_element);
450 skip_space_and_control();
453 size_t len = available_size();
457 param.keyword = sax::doctype_declaration::keyword_type::dtd_private;
461 if (next_and_char() !=
'U' || next_and_char() !=
'B' || next_and_char() !=
'L' || next_and_char() !=
'I' || next_and_char() !=
'C')
464 param.keyword = sax::doctype_declaration::keyword_type::dtd_public;
468 if (next_and_char() !=
'Y' || next_and_char() !=
'S' || next_and_char() !=
'T' || next_and_char() !=
'E' || next_and_char() !=
'M')
473 skip_space_and_control();
476 value(param.fpi,
false);
478 has_char_throw(
"DOCTYPE section too short.");
479 skip_space_and_control();
480 has_char_throw(
"DOCTYPE section too short.");
482 if (cur_char() ==
'>')
485#if ORCUS_DEBUG_SAX_PARSER
486 cout <<
"sax_parser::doctype: root='" << param.root_element <<
"', fpi='" << param.fpi <<
"'" << endl;
488 m_handler.doctype(param);
494 value(param.uri,
false);
496 has_char_throw(
"DOCTYPE section too short.");
497 skip_space_and_control();
498 has_char_throw(
"DOCTYPE section too short.");
500 if (cur_char() !=
'>')
501 throw malformed_xml_error(
"malformed DOCTYPE section - closing '>' expected but not found.", offset());
503#if ORCUS_DEBUG_SAX_PARSER
504 cout <<
"sax_parser::doctype: root='" << param.root_element <<
"', fpi='" << param.fpi <<
"' uri='" << param.uri <<
"'" << endl;
506 m_handler.doctype(param);
510template<
typename HandlerT,
typename ConfigT>
511void sax_parser<HandlerT,ConfigT>::characters()
513 const char* p0 = mp_char;
514 for (; has_char(); next())
516 if (cur_char() ==
'<')
519 if (cur_char() ==
'&')
524 buf.append(p0, mp_char-p0);
525 characters_with_encoded_char(buf);
527 m_handler.characters(std::string_view{},
false);
529 m_handler.characters(buf.str(),
true);
536 std::string_view val(p0, mp_char-p0);
537 m_handler.characters(val,
false);
541template<
typename HandlerT,
typename ConfigT>
542void sax_parser<HandlerT,ConfigT>::attribute()
545 attribute_name(attr.ns, attr.name);
547#if ORCUS_DEBUG_SAX_PARSER
548 cout <<
"sax_parser::attribute: ns='" << attr.ns <<
"', name='" << attr.name <<
"'" << endl;
551 skip_space_and_control();
553 char c = cur_char_checked();
556 std::ostringstream os;
557 os <<
"Attribute must begin with 'name=..'. (ns='" << attr.ns <<
"', name='" << attr.name <<
"')";
562 skip_space_and_control();
564 attr.transient = value(attr.value,
true);
569#if ORCUS_DEBUG_SAX_PARSER
570 cout <<
"sax_parser::attribute: value='" << attr.value <<
"'" << endl;
573 m_handler.attribute(attr);
Definition cell_buffer.hpp:22
Definition parser_base.hpp:23
Definition sax_parser_base.hpp:108
Definition sax_parser.hpp:31
void end_declaration(std::string_view decl)
Definition sax_parser.hpp:60
void doctype(const orcus::sax::doctype_declaration &dtd)
Definition sax_parser.hpp:38
void attribute(const orcus::sax::parser_attribute &attr)
Definition sax_parser.hpp:112
void characters(std::string_view val, bool transient)
Definition sax_parser.hpp:99
void start_declaration(std::string_view decl)
Definition sax_parser.hpp:50
void end_element(const orcus::sax::parser_element &elem)
Definition sax_parser.hpp:80
void start_element(const orcus::sax::parser_element &elem)
Definition sax_parser.hpp:70
Definition sax_parser_base.hpp:37
Definition sax_parser_base.hpp:96
Definition sax_parser_base.hpp:77
Definition sax_parser.hpp:21
static constexpr uint8_t baseline_version
Definition sax_parser.hpp:27