Orcus
Loading...
Searching...
No Matches
css_parser.hpp
1/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2/*
3 * This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
6 */
7
8#ifndef INCLUDED_ORCUS_CSS_PARSER_HPP
9#define INCLUDED_ORCUS_CSS_PARSER_HPP
10
11#define ORCUS_DEBUG_CSS 0
12
13#include "parser_global.hpp"
14#include "css_parser_base.hpp"
15
16#include <cassert>
17#include <algorithm>
18
19#if ORCUS_DEBUG_CSS
20#include <iostream>
21using std::cout;
22using std::endl;
23#endif
24
25namespace orcus {
26
32{
33public:
39 void at_rule_name(std::string_view name)
40 {
41 (void)name;
42 }
43
57 void simple_selector_type(std::string_view type)
58 {
59 (void)type;
60 }
61
75 void simple_selector_class(std::string_view cls)
76 {
77 (void)cls;
78 }
79
95 void simple_selector_pseudo_element(orcus::css::pseudo_element_t pe)
96 {
97 (void)pe;
98 }
99
114 void simple_selector_pseudo_class(orcus::css::pseudo_class_t pc)
115 {
116 (void)pc;
117 }
118
132 void simple_selector_id(std::string_view id)
133 {
134 (void)id;
135 }
136
144
151 void end_selector() {}
152
167 void combinator(orcus::css::combinator_t combinator)
168 {
169 (void)combinator;
170 }
171
177 void property_name(std::string_view name)
178 {
179 (void)name;
180 }
181
187 void value(std::string_view value)
188 {
189 (void)value;
190 }
191
199 void rgb(uint8_t red, uint8_t green, uint8_t blue)
200 {
201 (void)red; (void)green; (void)blue;
202 }
203
213 void rgba(uint8_t red, uint8_t green, uint8_t blue, double alpha)
214 {
215 (void)red; (void)green; (void)blue; (void)alpha;
216 }
217
225 void hsl(uint8_t hue, uint8_t sat, uint8_t light)
226 {
227 (void)hue; (void)sat; (void)light;
228 }
229
239 void hsla(uint8_t hue, uint8_t sat, uint8_t light, double alpha)
240 {
241 (void)hue; (void)sat; (void)light; (void)alpha;
242 }
243
249 void url(std::string_view url)
250 {
251 (void)url;
252 }
253
257 void begin_parse() {}
258
262 void end_parse() {}
263
268 void begin_block() {}
269
274 void end_block() {}
275
287
291 void end_property() {}
292};
293
300template<typename HandlerT>
301class css_parser : public css::parser_base
302{
303public:
304 typedef HandlerT handler_type;
305
306 css_parser(std::string_view content, handler_type& hdl);
307 void parse();
308
309private:
310 // Handlers - at the time a handler is called the current position is
311 // expected to point to the first unprocessed non-blank character, and
312 // each handler must set the current position to the next unprocessed
313 // non-blank character when it finishes.
314 void rule();
315 void at_rule_name();
316 void simple_selector_name();
317 void property_name();
318 void property();
319 void quoted_value(char c);
320 void value();
321 void function_value(std::string_view v);
322 void function_rgb(bool alpha);
323 void function_hsl(bool alpha);
324 void function_url();
325 void name_sep();
326 void property_sep();
327 void block();
328
329 handler_type& m_handler;
330};
331
332template<typename _Handler>
333css_parser<_Handler>::css_parser(std::string_view content, handler_type& hdl) :
334 css::parser_base(content), m_handler(hdl) {}
335
336template<typename _Handler>
337void css_parser<_Handler>::parse()
338{
339 shrink_stream();
340
341#if ORCUS_DEBUG_CSS
342 std::cout << "compressed: '";
343 const char* p = mp_char;
344 for (; p != mp_end; ++p)
345 std::cout << *p;
346 std::cout << "'" << std::endl;
347#endif
348 m_handler.begin_parse();
349 while (has_char())
350 rule();
351 m_handler.end_parse();
352}
353
354template<typename _Handler>
355void css_parser<_Handler>::rule()
356{
357 // <selector name> , ... , <selector name> <block>
358 while (has_char())
359 {
360 if (skip_comment())
361 continue;
362
363 char c = cur_char();
364 if (is_alpha(c))
365 {
366 simple_selector_name();
367 continue;
368 }
369
370 switch (c)
371 {
372 case '>':
373 set_combinator(c, css::combinator_t::direct_child);
374 break;
375 case '+':
376 set_combinator(c, css::combinator_t::next_sibling);
377 break;
378 case '.':
379 case '#':
380 case '@':
381 simple_selector_name();
382 break;
383 case ',':
384 name_sep();
385 break;
386 case '{':
387 reset_before_block();
388 block();
389 break;
390 default:
391 parse_error::throw_with("rule: failed to parse '", c, "'", offset());
392 }
393 }
394}
395
396template<typename _Handler>
397void css_parser<_Handler>::at_rule_name()
398{
399 assert(has_char());
400 assert(cur_char() == '@');
401 next();
402 char c = cur_char();
403 if (!is_alpha(c))
404 throw parse_error("at_rule_name: first character of an at-rule name must be an alphabet.", offset());
405
406 const char* p;
407 size_t len;
408 identifier(p, len);
409 skip_blanks();
410
411 m_handler.at_rule_name({p, len});
412#if ORCUS_DEBUG_CSS
413 std::string foo(p, len);
414 std::cout << "at-rule name: " << foo.c_str() << std::endl;
415#endif
416}
417
418template<typename _Handler>
419void css_parser<_Handler>::simple_selector_name()
420{
421 assert(has_char());
422 char c = cur_char();
423 if (c == '@')
424 {
425 // This is the name of an at-rule.
426 at_rule_name();
427 return;
428 }
429
430 if (m_simple_selector_count)
431 {
432#if ORCUS_DEBUG_CSS
433 cout << "combinator: " << m_combinator << endl;
434#endif
435 m_handler.combinator(m_combinator);
436 m_combinator = css::combinator_t::descendant;
437 }
438 assert(is_alpha(c) || c == '.' || c == '#');
439
440 const char* p = nullptr;
441 size_t n = 0;
442
443#if ORCUS_DEBUG_CSS
444 cout << "simple_selector_name: (" << m_simple_selector_count << ")";
445#endif
446
447 if (c != '.' && c != '#')
448 {
449 identifier(p, n);
450#if ORCUS_DEBUG_CSS
451 std::string s(p, n);
452 cout << " type=" << s;
453#endif
454 m_handler.simple_selector_type({p, n});
455 }
456
457 bool in_loop = true;
458 while (in_loop && has_char())
459 {
460 switch (cur_char())
461 {
462 case '.':
463 {
464 next();
465 identifier(p, n);
466 m_handler.simple_selector_class({p, n});
467#if ORCUS_DEBUG_CSS
468 std::string s(p, n);
469 std::cout << " class=" << s;
470#endif
471 }
472 break;
473 case '#':
474 {
475 next();
476 identifier(p, n);
477 m_handler.simple_selector_id({p, n});
478#if ORCUS_DEBUG_CSS
479 std::string s(p, n);
480 std::cout << " id=" << s;
481#endif
482 }
483 break;
484 case ':':
485 {
486 // This could be either a pseudo element or pseudo class.
487 next();
488 if (cur_char() == ':')
489 {
490 // pseudo element.
491 next();
492 identifier(p, n);
493 css::pseudo_element_t elem = css::to_pseudo_element({p, n});
494 if (!elem)
495 parse_error::throw_with(
496 "selector_name: unknown pseudo element '", {p, n}, "'", offset());
497
498 m_handler.simple_selector_pseudo_element(elem);
499 }
500 else
501 {
502 // pseudo class (or pseudo element in the older version of CSS).
503 identifier(p, n);
504 css::pseudo_class_t pc = css::to_pseudo_class({p, n});
505 if (!pc)
506 parse_error::throw_with(
507 "selector_name: unknown pseudo class '", {p, n}, "'", offset());
508
509 m_handler.simple_selector_pseudo_class(pc);
510 }
511 }
512 break;
513 default:
514 in_loop = false;
515 }
516 }
517
518 m_handler.end_simple_selector();
519 skip_comments_and_blanks();
520
521 ++m_simple_selector_count;
522
523#if ORCUS_DEBUG_CSS
524 std::cout << std::endl;
525#endif
526}
527
528template<typename _Handler>
529void css_parser<_Handler>::property_name()
530{
531 // <identifier>
532
533 assert(has_char());
534 char c = cur_char();
535 if (!is_alpha(c) && c != '.')
536 parse_error::throw_with(
537 "property_name: first character of a name must be an alphabet or a dot, but found '", c, "'", offset());
538
539 const char* p;
540 size_t len;
541 identifier(p, len);
542 skip_comments_and_blanks();
543
544 m_handler.property_name({p, len});
545#if ORCUS_DEBUG_CSS
546 std::string foo(p, len);
547 std::cout << "property name: " << foo.c_str() << std::endl;
548#endif
549}
550
551template<typename _Handler>
552void css_parser<_Handler>::property()
553{
554 // <property name> : <value> , ... , <value>
555
556 m_handler.begin_property();
557 property_name();
558 if (cur_char() != ':')
559 throw parse_error("property: ':' expected.", offset());
560 next();
561 skip_comments_and_blanks();
562
563 bool in_loop = true;
564 while (in_loop && has_char())
565 {
566 value();
567 char c = cur_char();
568 switch (c)
569 {
570 case ',':
571 {
572 // separated by commas.
573 next();
574 skip_comments_and_blanks();
575 }
576 break;
577 case ';':
578 case '}':
579 in_loop = false;
580 break;
581 default:
582 ;
583 }
584 }
585
586 skip_comments_and_blanks();
587 m_handler.end_property();
588}
589
590template<typename _Handler>
591void css_parser<_Handler>::quoted_value(char c)
592{
593 // Parse until the the end quote is reached.
594 const char* p = nullptr;
595 size_t len = 0;
596 literal(p, len, c);
597 next();
598 skip_blanks();
599
600 m_handler.value({p, len});
601#if ORCUS_DEBUG_CSS
602 std::string foo(p, len);
603 std::cout << "quoted value: " << foo.c_str() << std::endl;
604#endif
605}
606
607template<typename _Handler>
608void css_parser<_Handler>::value()
609{
610 assert(has_char());
611 char c = cur_char();
612 if (c == '"' || c == '\'')
613 {
614 quoted_value(c);
615 return;
616 }
617
618 std::string_view v = parse_value();
619 if (v.empty())
620 return;
621
622 if (cur_char() == '(')
623 {
624 function_value(v);
625 return;
626 }
627
628 m_handler.value(v);
629
630 skip_comments_and_blanks();
631
632#if ORCUS_DEBUG_CSS
633 std::cout << "value: " << v << std::endl;
634#endif
635}
636
637template<typename _Handler>
638void css_parser<_Handler>::function_value(std::string_view v)
639{
640 assert(cur_char() == '(');
641 css::property_function_t func = css::to_property_function(v);
642 if (func == css::property_function_t::unknown)
643 parse_error::throw_with("function_value: unknown function '", v, "'", offset());
644
645 // Move to the first character of the first argument.
646 next();
647 skip_comments_and_blanks();
648
649 switch (func)
650 {
651 case css::property_function_t::rgb:
652 function_rgb(false);
653 break;
654 case css::property_function_t::rgba:
655 function_rgb(true);
656 break;
657 case css::property_function_t::hsl:
658 function_hsl(false);
659 break;
660 case css::property_function_t::hsla:
661 function_hsl(true);
662 break;
663 case css::property_function_t::url:
664 function_url();
665 break;
666 default:
667 parse_error::throw_with("function_value: unhandled function '", v, "'", offset());
668 }
669
670 char c = cur_char();
671 if (c != ')')
672 parse_error::throw_with("function_value: ')' expected but '", c, "' found.", offset());
673
674 next();
675 skip_comments_and_blanks();
676}
677
678template<typename _Handler>
679void css_parser<_Handler>::function_rgb(bool alpha)
680{
681 // rgb(num, num, num) rgba(num, num, num, float)
682
683 uint8_t vals[3];
684 uint8_t* p = vals;
685 const uint8_t* plast = p + 2;
686 char c = 0;
687
688 for (; ; ++p)
689 {
690 *p = parse_uint8();
691
692 skip_comments_and_blanks();
693
694 if (p == plast)
695 break;
696
697 c = cur_char();
698
699 if (c != ',')
700 parse_error::throw_with("function_rgb: ',' expected but '", c, "' found.", offset());
701
702 next();
703 skip_comments_and_blanks();
704 }
705
706 if (alpha)
707 {
708 c = cur_char();
709 if (c != ',')
710 parse_error::throw_with("function_rgb: ',' expected but '", c, "' found.", offset());
711
712 next();
713 skip_comments_and_blanks();
714
715 double alpha_val = parse_double_or_throw();
716
717 alpha_val = std::clamp(alpha_val, 0.0, 1.0);
718 m_handler.rgba(vals[0], vals[1], vals[2], alpha_val);
719 }
720 else
721 m_handler.rgb(vals[0], vals[1], vals[2]);
722
723#if ORCUS_DEBUG_CSS
724 std::cout << "rgb";
725 if (alpha)
726 std::cout << 'a';
727 std::cout << '(';
728 p = vals;
729 const uint8_t* pend = plast + 1;
730 for (; p != pend; ++p)
731 std::cout << ' ' << (int)*p;
732 std::cout << " )" << std::endl;
733#endif
734}
735
736template<typename _Handler>
737void css_parser<_Handler>::function_hsl(bool alpha)
738{
739 // hsl(num, percent, percent) hsla(num, percent, percent, float)
740
741 double hue = parse_double_or_throw(); // casted to uint8_t eventually.
742 hue = std::clamp(hue, 0.0, 360.0);
743 skip_comments_and_blanks();
744
745 char c = cur_char();
746 if (c != ',')
747 parse_error::throw_with("function_hsl: ',' expected but '", c, "' found.", offset());
748
749 next();
750 skip_comments_and_blanks();
751
752 double sat = parse_percent();
753 sat = std::clamp(sat, 0.0, 100.0);
754 skip_comments_and_blanks();
755
756 c = cur_char();
757 if (c != ',')
758 parse_error::throw_with("function_hsl: ',' expected but '", c, "' found.", offset());
759
760 next();
761 skip_comments_and_blanks();
762
763 double light = parse_percent();
764 light = std::clamp(light, 0.0, 100.0);
765 skip_comments_and_blanks();
766
767 if (!alpha)
768 {
769 m_handler.hsl(hue, sat, light);
770 return;
771 }
772
773 c = cur_char();
774 if (c != ',')
775 parse_error::throw_with("function_hsl: ',' expected but '", c, "' found.", offset());
776
777 next();
778 skip_comments_and_blanks();
779
780 double alpha_val = parse_double_or_throw();
781 alpha_val = std::clamp(alpha_val, 0.0, 1.0);
782 skip_comments_and_blanks();
783 m_handler.hsla(hue, sat, light, alpha_val);
784}
785
786template<typename _Handler>
787void css_parser<_Handler>::function_url()
788{
789 char c = cur_char();
790
791 if (c == '"' || c == '\'')
792 {
793 // Quoted URL value.
794 const char* p;
795 size_t len;
796 literal(p, len, c);
797 next();
798 skip_comments_and_blanks();
799 m_handler.url({p, len});
800#if ORCUS_DEBUG_CSS
801 std::cout << "url(" << std::string(p, len) << ")" << std::endl;
802#endif
803 return;
804 }
805
806 // Unquoted URL value.
807 const char* p;
808 size_t len;
809 skip_to_or_blank(p, len, ")");
810 skip_comments_and_blanks();
811 m_handler.url({p, len});
812#if ORCUS_DEBUG_CSS
813 std::cout << "url(" << std::string(p, len) << ")" << std::endl;
814#endif
815}
816
817template<typename _Handler>
818void css_parser<_Handler>::name_sep()
819{
820 assert(cur_char() == ',');
821#if ORCUS_DEBUG_CSS
822 std::cout << "," << std::endl;
823#endif
824 next();
825 skip_blanks();
826 m_handler.end_selector();
827}
828
829template<typename _Handler>
830void css_parser<_Handler>::property_sep()
831{
832#if ORCUS_DEBUG_CSS
833 std::cout << ";" << std::endl;
834#endif
835 next();
836 skip_comments_and_blanks();
837}
838
839template<typename _Handler>
840void css_parser<_Handler>::block()
841{
842 // '{' <property> ';' ... ';' <property> ';'(optional) '}'
843
844 assert(cur_char() == '{');
845#if ORCUS_DEBUG_CSS
846 std::cout << "{" << std::endl;
847#endif
848 m_handler.end_selector();
849 m_handler.begin_block();
850
851 next();
852 skip_comments_and_blanks();
853
854 // parse properties.
855 while (has_char())
856 {
857 property();
858 if (cur_char() != ';')
859 break;
860 property_sep();
861 if (cur_char() == '}')
862 // ';' after the last property. This is optional but allowed.
863 break;
864 }
865
866 if (cur_char() != '}')
867 throw parse_error("block: '}' expected.", offset());
868
869 m_handler.end_block();
870
871 next();
872 skip_comments_and_blanks();
873
874#if ORCUS_DEBUG_CSS
875 std::cout << "}" << std::endl;
876#endif
877}
878
879}
880
881#endif
882
883/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
Definition css_parser_base.hpp:22
Definition css_parser.hpp:32
void url(std::string_view url)
Definition css_parser.hpp:249
void end_parse()
Definition css_parser.hpp:262
void end_block()
Definition css_parser.hpp:274
void property_name(std::string_view name)
Definition css_parser.hpp:177
void at_rule_name(std::string_view name)
Definition css_parser.hpp:39
void hsl(uint8_t hue, uint8_t sat, uint8_t light)
Definition css_parser.hpp:225
void end_property()
Definition css_parser.hpp:291
void begin_parse()
Definition css_parser.hpp:257
void end_selector()
Definition css_parser.hpp:151
void begin_block()
Definition css_parser.hpp:268
void simple_selector_pseudo_element(orcus::css::pseudo_element_t pe)
Definition css_parser.hpp:95
void simple_selector_class(std::string_view cls)
Definition css_parser.hpp:75
void rgba(uint8_t red, uint8_t green, uint8_t blue, double alpha)
Definition css_parser.hpp:213
void rgb(uint8_t red, uint8_t green, uint8_t blue)
Definition css_parser.hpp:199
void begin_property()
Definition css_parser.hpp:286
void simple_selector_pseudo_class(orcus::css::pseudo_class_t pc)
Definition css_parser.hpp:114
void hsla(uint8_t hue, uint8_t sat, uint8_t light, double alpha)
Definition css_parser.hpp:239
void simple_selector_id(std::string_view id)
Definition css_parser.hpp:132
void combinator(orcus::css::combinator_t combinator)
Definition css_parser.hpp:167
void simple_selector_type(std::string_view type)
Definition css_parser.hpp:57
void value(std::string_view value)
Definition css_parser.hpp:187
void end_simple_selector()
Definition css_parser.hpp:143
Definition exception.hpp:94
Definition parser_base.hpp:23