Orcus
Loading...
Searching...
No Matches
sax_ns_parser.hpp
1/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2/*
3 * This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
6 */
7
8#ifndef INCLUDED_ORCUS_SAX_NS_PARSER_HPP
9#define INCLUDED_ORCUS_SAX_NS_PARSER_HPP
10
11#include "sax_parser.hpp"
12#include "xml_namespace.hpp"
13
14#include <unordered_set>
15#include <vector>
16#include <algorithm>
17
18namespace orcus {
19
21{
23 xmlns_id_t ns;
25 std::string_view ns_alias;
27 std::string_view name;
29 std::ptrdiff_t begin_pos;
31 std::ptrdiff_t end_pos;
32};
33
35{
37 xmlns_id_t ns;
39 std::string_view ns_alias;
41 std::string_view name;
43 std::string_view value;
46};
47
48namespace sax { namespace detail {
49
50struct entity_name
51{
52 std::string_view ns;
53 std::string_view name;
54
55 entity_name(std::string_view _ns, std::string_view _name) :
56 ns(_ns), name(_name) {}
57
58 bool operator== (const entity_name& other) const
59 {
60 return other.ns == ns && other.name == name;
61 }
62
63 struct hash
64 {
65 size_t operator() (const entity_name& v) const
66 {
67 std::hash<std::string_view> hasher;
68 return hasher(v.ns) + hasher(v.name);
69 }
70 };
71};
72
73typedef std::unordered_set<std::string_view> ns_keys_type;
74typedef std::unordered_set<entity_name, entity_name::hash> entity_names_type;
75
76struct elem_scope
77{
78 xmlns_id_t ns;
79 std::string_view name;
80 ns_keys_type ns_keys;
81
82 elem_scope() {}
83 elem_scope(const elem_scope&) = delete;
84 elem_scope(elem_scope&& other) = default;
85};
86
87using elem_scopes_type = std::vector<elem_scope>;
88
89}} // namespace sax::detail
90
92{
93public:
100 {
101 (void)dtd;
102 }
103
111 void start_declaration(std::string_view decl)
112 {
113 (void)decl;
114 }
115
121 void end_declaration(std::string_view decl)
122 {
123 (void)decl;
124 }
125
132 {
133 (void)elem;
134 }
135
142 {
143 (void)elem;
144 }
145
160 void characters(std::string_view val, bool transient)
161 {
162 (void)val;
163 (void)transient;
164 }
165
177 void attribute(std::string_view name, std::string_view val)
178 {
179 (void)name;
180 (void)val;
181 }
182
192 {
193 (void)attr;
194 }
195};
196
211template<typename HandlerT>
212class sax_ns_parser
213{
214public:
215 typedef HandlerT handler_type;
216
217 sax_ns_parser(std::string_view content, xmlns_context& ns_cxt, handler_type& handler);
218 ~sax_ns_parser() = default;
219
226 void parse();
227
228private:
233 class handler_wrapper
234 {
235 sax::detail::elem_scopes_type m_scopes;
236 sax::detail::ns_keys_type m_ns_keys;
237 sax::detail::entity_names_type m_attrs;
238
241
242 xmlns_context& m_ns_cxt;
243 handler_type& m_handler;
244
245 bool m_declaration;
246
247 public:
248 handler_wrapper(xmlns_context& ns_cxt, handler_type& handler) : m_ns_cxt(ns_cxt), m_handler(handler), m_declaration(false) {}
249
250 void doctype(const sax::doctype_declaration& dtd)
251 {
252 m_handler.doctype(dtd);
253 }
254
255 void start_declaration(std::string_view name)
256 {
257 m_declaration = true;
258 m_handler.start_declaration(name);
259 }
260
261 void end_declaration(std::string_view name)
262 {
263 m_declaration = false;
264 m_handler.end_declaration(name);
265 }
266
267 void start_element(const sax::parser_element& elem)
268 {
269 m_scopes.emplace_back();
270 sax::detail::elem_scope& scope = m_scopes.back();
271 scope.ns = m_ns_cxt.get(elem.ns);
272 scope.name = elem.name;
273 scope.ns_keys.swap(m_ns_keys);
274
275 m_elem.ns = scope.ns;
276 m_elem.ns_alias = elem.ns;
277 m_elem.name = scope.name;
278 m_elem.begin_pos = elem.begin_pos;
279 m_elem.end_pos = elem.end_pos;
280 m_handler.start_element(m_elem);
281
282 m_attrs.clear();
283 }
284
285 void end_element(const sax::parser_element& elem)
286 {
287 sax::detail::elem_scope& scope = m_scopes.back();
288 if (scope.ns != m_ns_cxt.get(elem.ns) || scope.name != elem.name)
289 throw malformed_xml_error("mis-matching closing element.", -1);
290
291 m_elem.ns = scope.ns;
292 m_elem.ns_alias = elem.ns;
293 m_elem.name = scope.name;
294 m_elem.begin_pos = elem.begin_pos;
295 m_elem.end_pos = elem.end_pos;
296 m_handler.end_element(m_elem);
297
298 // Pop all namespaces declared in this scope.
299 for (const std::string_view& key : scope.ns_keys)
300 m_ns_cxt.pop(key);
301
302 m_scopes.pop_back();
303 }
304
305 void characters(std::string_view val, bool transient)
306 {
307 m_handler.characters(val, transient);
308 }
309
310 void attribute(const sax::parser_attribute& attr)
311 {
312 if (m_declaration)
313 {
314 // XML declaration attribute. Pass it through to the handler without namespace.
315 m_handler.attribute(attr.name, attr.value);
316 return;
317 }
318
319 if (m_attrs.count(sax::detail::entity_name(attr.ns, attr.name)) > 0)
320 throw malformed_xml_error(
321 "You can't define two attributes of the same name in the same element.", -1);
322
323 m_attrs.insert(sax::detail::entity_name(attr.ns, attr.name));
324
325 if (attr.ns.empty() && attr.name == "xmlns")
326 {
327 // Default namespace
328 m_ns_cxt.push(std::string_view{}, attr.value);
329 m_ns_keys.insert(std::string_view{});
330 return;
331 }
332
333 if (attr.ns == "xmlns")
334 {
335 // Namespace alias
336 if (!attr.name.empty())
337 {
338 m_ns_cxt.push(attr.name, attr.value);
339 m_ns_keys.insert(attr.name);
340 }
341 return;
342 }
343
344 m_attr.ns = attr.ns.empty() ? XMLNS_UNKNOWN_ID : m_ns_cxt.get(attr.ns);
345 m_attr.ns_alias = attr.ns;
346 m_attr.name = attr.name;
347 m_attr.value = attr.value;
348 m_attr.transient = attr.transient;
349 m_handler.attribute(m_attr);
350 }
351 };
352
353private:
354 handler_wrapper m_wrapper;
355 sax_parser<handler_wrapper> m_parser;
356};
357
358template<typename HandlerT>
359sax_ns_parser<HandlerT>::sax_ns_parser(
360 std::string_view content, xmlns_context& ns_cxt, handler_type& handler) :
361 m_wrapper(ns_cxt, handler), m_parser(content, m_wrapper)
362{
363}
364
365template<typename HandlerT>
367{
368 m_parser.parse();
369}
370
371}
372
373#endif
374/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
Definition sax_ns_parser.hpp:92
void attribute(std::string_view name, std::string_view val)
Definition sax_ns_parser.hpp:177
void attribute(const orcus::sax_ns_parser_attribute &attr)
Definition sax_ns_parser.hpp:191
void start_element(const orcus::sax_ns_parser_element &elem)
Definition sax_ns_parser.hpp:131
void characters(std::string_view val, bool transient)
Definition sax_ns_parser.hpp:160
void start_declaration(std::string_view decl)
Definition sax_ns_parser.hpp:111
void doctype(const orcus::sax::doctype_declaration &dtd)
Definition sax_ns_parser.hpp:99
void end_element(const orcus::sax_ns_parser_element &elem)
Definition sax_ns_parser.hpp:141
void end_declaration(std::string_view decl)
Definition sax_ns_parser.hpp:121
void parse()
Definition sax_ns_parser.hpp:366
Definition xml_namespace.hpp:100
Definition sax_ns_parser.hpp:64
Definition sax_parser_base.hpp:37
Definition sax_ns_parser.hpp:35
std::string_view value
Definition sax_ns_parser.hpp:43
xmlns_id_t ns
Definition sax_ns_parser.hpp:37
bool transient
Definition sax_ns_parser.hpp:45
std::string_view name
Definition sax_ns_parser.hpp:41
std::string_view ns_alias
Definition sax_ns_parser.hpp:39
Definition sax_ns_parser.hpp:21
std::ptrdiff_t end_pos
Definition sax_ns_parser.hpp:31
xmlns_id_t ns
Definition sax_ns_parser.hpp:23
std::string_view name
Definition sax_ns_parser.hpp:27
std::string_view ns_alias
Definition sax_ns_parser.hpp:25
std::ptrdiff_t begin_pos
Definition sax_ns_parser.hpp:29