class XMLScan::XMLScanner
Constants
- CDATAPattern
- DoctypePattern
- E_OPT_EXAMPLE
- InvalidEntityRef
- S_OPT_EXAMPLE
- SkipDTD
- XMLDeclPattern
Attributes
optkey[RW]
Public Class Methods
apply_option(instance, option)
click to toggle source
# File lib/xmlscan/scanner.rb 267 def apply_option(instance, option) 268 instance.__send__ "apply_option_#{option}" 269 end
new(visitor, *options)
click to toggle source
Calls superclass method
# File lib/xmlscan/scanner.rb 292 def new(visitor, *options) 293 instance = super(visitor) 294 apply_options instance, options 295 end
new(visitor)
click to toggle source
# File lib/xmlscan/scanner.rb 301 def initialize(visitor) 302 @visitor = visitor 303 @decoration = nil 304 @src = nil 305 @optkey = nil 306 end
provided_options()
click to toggle source
# File lib/xmlscan/scanner.rb 259 def provided_options 260 options = [] 261 private_instance_methods.each { |i| 262 options.push $' if /\Aapply_option_/ =~ i 263 } 264 options 265 end
Private Class Methods
apply_options(instance, options)
click to toggle source
# File lib/xmlscan/scanner.rb 271 def apply_options(instance, options) 272 h = {} 273 options.each { |i| h[i.to_s] = true } 274 options = h 275 ancestors.each { |klass| 276 if klass.respond_to? :provided_options then 277 klass.provided_options.each { |i| 278 if options.include? i then 279 options.delete i 280 klass.apply_option instance, i 281 end 282 } 283 end 284 } 285 unless options.empty? then 286 raise ArgumentError, "undefined option `#{options.keys[0]}'" 287 end 288 instance 289 end
Public Instance Methods
lineno()
click to toggle source
# File lib/xmlscan/scanner.rb 322 def lineno 323 @src && @src.lineno 324 end
opt_encoding()
click to toggle source
# File lib/xmlscan/scanner.rb 310 def opt_encoding() OptRegexp::RE_ENCODINGS[optkey] end
parse_document(src)
click to toggle source
# File lib/xmlscan/scanner.rb 1113 def parse_document(src) 1114 @src = make_source(src) 1115 begin 1116 scan_document 1117 ensure 1118 @src = nil 1119 end 1120 self 1121 end
Also aliased as: parse
path()
click to toggle source
# File lib/xmlscan/scanner.rb 326 def path 327 @src && @src.path 328 end
source()
click to toggle source
# File lib/xmlscan/scanner.rb 330 def source 331 @src.source 332 end
Private Instance Methods
apply_option_strict_char()
click to toggle source
# File lib/xmlscan/xmlchar.rb 216 def apply_option_strict_char 217 extend StrictChar 218 end
decorate(decoration)
click to toggle source
# File lib/xmlscan/scanner.rb 313 def decorate(decoration) 314 unless @decoration then 315 @visitor = @decoration = Decoration.new(@visitor) 316 end 317 @decoration.expand decoration 318 end
found_empty_etag()
click to toggle source
# File lib/xmlscan/scanner.rb 691 def found_empty_etag 692 parse_error "parse error at `</'" 693 on_chardata '</>' 694 end
found_empty_stag()
click to toggle source
# File lib/xmlscan/scanner.rb 726 def found_empty_stag 727 parse_error "parse error at `<'" 728 on_chardata '<>' 729 end
found_invalid_pubsys(pubsys)
click to toggle source
# File lib/xmlscan/scanner.rb 988 def found_invalid_pubsys(pubsys) 989 parse_error "`PUBLIC' or `SYSTEM' should be here" 990 'SYSTEM' 991 end
found_stag_error(s)
click to toggle source
# File lib/xmlscan/scanner.rb 748 def found_stag_error(s) 749 if /\A[\/='"]/ =~ s then 750 tok, s = $&, $' 751 elsif /(?=[ \t\n\r\/='"])/ =~ s then 752 tok, s = $`, $' 753 else 754 tok, s = s, nil 755 end 756 parse_error "parse error at `#{tok}'" 757 s 758 end
found_unclosed_emptyelem(name)
click to toggle source
# File lib/xmlscan/scanner.rb 739 def found_unclosed_emptyelem(name) 740 if @src.tag_start? then 741 parse_error "unclosed empty element tag `#{name}' meets another tag" 742 else 743 parse_error "unclosed empty element tag `#{name}' meets EOF" 744 end 745 end
found_unclosed_etag(name)
click to toggle source
# File lib/xmlscan/scanner.rb 683 def found_unclosed_etag(name) 684 if @src.tag_start? then 685 parse_error "unclosed end tag `#{name}' meets another tag" 686 else 687 parse_error "unclosed end tag `#{name}' meets EOF" 688 end 689 end
found_unclosed_stag(name)
click to toggle source
# File lib/xmlscan/scanner.rb 731 def found_unclosed_stag(name) 732 if @src.tag_start? then 733 parse_error "unclosed start tag `#{name}' meets another tag" 734 else 735 parse_error "unclosed start tag `#{name}' meets EOF" 736 end 737 end
get_until_qmark(str, qmark)
click to toggle source
# File lib/xmlscan/scanner.rb 890 def get_until_qmark(str, qmark) 891 begin 892 #s = @src.get_plain 893 s = @src.get 894 break unless s 895 c = s[0] 896 v, s = s.split(qmark, 2) 897 str << '>' unless c == ?< or c == ?> # De Morgan 898 str << v if c 899 end until s 900 s 901 end
make_source(src)
click to toggle source
# File lib/xmlscan/scanner.rb 1106 def make_source(src) 1107 Source.new src 1108 end
on_attr_charref(code, *a)
click to toggle source
# File lib/xmlscan/scanner.rb 473 def on_attr_charref(code, *a) 474 @visitor.on_attr_charref code, *a 475 end
on_attr_charref_hex(code, *a)
click to toggle source
# File lib/xmlscan/scanner.rb 477 def on_attr_charref_hex(code, *a) 478 @visitor.on_attr_charref_hex code, *a 479 end
on_attr_entityref(ref, *a)
click to toggle source
# File lib/xmlscan/scanner.rb 469 def on_attr_entityref(ref, *a) 470 @visitor.on_attr_entityref ref, *a 471 end
on_attr_value(str, *a)
click to toggle source
# File lib/xmlscan/scanner.rb 465 def on_attr_value(str, *a) 466 @visitor.on_attr_value str, *a 467 end
on_attribute(name, *a)
click to toggle source
# File lib/xmlscan/scanner.rb 461 def on_attribute(name, *a) 462 @visitor.on_attribute name, *a 463 end
on_attribute_end(name, *a)
click to toggle source
# File lib/xmlscan/scanner.rb 481 def on_attribute_end(name, *a) 482 @visitor.on_attribute_end name, *a, *a 483 end
on_cdata(str, *a)
click to toggle source
# File lib/xmlscan/scanner.rb 407 def on_cdata(str, *a) 408 @visitor.on_cdata str, *a 409 end
on_chardata(str, *a)
click to toggle source
# File lib/xmlscan/scanner.rb 403 def on_chardata(str, *a) 404 @visitor.on_chardata str, *a 405 end
on_charref(code, *a)
click to toggle source
# File lib/xmlscan/scanner.rb 419 def on_charref(code, *a) 420 @visitor.on_charref code, *a 421 end
on_charref_hex(code, *a)
click to toggle source
# File lib/xmlscan/scanner.rb 423 def on_charref_hex(code, *a) 424 @visitor.on_charref_hex code, *a 425 end
on_comment(str, *a)
click to toggle source
# File lib/xmlscan/scanner.rb 395 def on_comment(str, *a) 396 @visitor.on_comment str, *a 397 end
on_doctype(root, pubid, sysid, *a)
click to toggle source
# File lib/xmlscan/scanner.rb 387 def on_doctype(root, pubid, sysid, *a) 388 @visitor.on_doctype root, pubid, sysid, *a 389 end
on_end_document(*a)
click to toggle source
# File lib/xmlscan/scanner.rb 431 def on_end_document(*a) 432 @visitor.on_end_document *a 433 end
on_entityref(ref, *a)
click to toggle source
# File lib/xmlscan/scanner.rb 415 def on_entityref(ref, *a) 416 @visitor.on_entityref ref, *a 417 end
on_etag(name, *a)
click to toggle source
# File lib/xmlscan/scanner.rb 411 def on_etag(name, *a) 412 @visitor.on_etag name, *a 413 end
on_pi(target, pi, *a)
click to toggle source
# File lib/xmlscan/scanner.rb 399 def on_pi(target, pi, *a) 400 @visitor.on_pi target, pi, *a 401 end
on_prolog_space(str, *a)
click to toggle source
# File lib/xmlscan/scanner.rb 391 def on_prolog_space(str, *a) 392 @visitor.on_prolog_space str, *a 393 end
on_stag(name, *a)
click to toggle source
<hoge fuga=“foo&bar;&&foo” />HOGE ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ 1 2 3 4 5 6 7 8 9 A
The following method will be called with the following arguments when the parser reaches the above point;
1: on_stag ('hoge') 2: on_attribute ('fuga') 3: on_attr_value ('foo') 4: on_attr_entityref ('bar') 5: on_attr_charref (38) 6: on_attr_charref_hex (38) 7: on_attr_value ('foo') 8: on_attribute_end ('fuga') 9: on_stag_end_empty ('hoge') or on_stag_end ('hoge') A: on_chardata ('HOGE')
# File lib/xmlscan/scanner.rb 457 def on_stag(name, *a) 458 @visitor.on_stag name, *a 459 end
on_stag_end(name, *a)
click to toggle source
# File lib/xmlscan/scanner.rb 489 def on_stag_end(name, *a) 490 #STDERR << "ose #{name}, #{a.inspect}\n" 491 @visitor.on_stag_end name, *a 492 end
on_stag_end_empty(name, *a)
click to toggle source
# File lib/xmlscan/scanner.rb 485 def on_stag_end_empty(name, *a) 486 @visitor.on_stag_end_empty name, *a 487 end
on_start_document(*a)
click to toggle source
# File lib/xmlscan/scanner.rb 427 def on_start_document(*a) 428 @visitor.on_start_document *a 429 end
on_xmldecl()
click to toggle source
# File lib/xmlscan/scanner.rb 354 def on_xmldecl 355 @visitor.on_xmldecl 356 end
on_xmldecl_encoding(str, *a)
click to toggle source
# File lib/xmlscan/scanner.rb 371 def on_xmldecl_encoding(str, *a) 372 @visitor.on_xmldecl_encoding str, *a 373 end
on_xmldecl_end(*a)
click to toggle source
# File lib/xmlscan/scanner.rb 383 def on_xmldecl_end(*a) 384 @visitor.on_xmldecl_end *a 385 end
on_xmldecl_key(key, str)
click to toggle source
# File lib/xmlscan/scanner.rb 358 def on_xmldecl_key(key, str) 359 meth = "on_xmldecl_#{key}" 360 if @visitor.respond_to? meth 361 self.send meth, str 362 else 363 self.send :on_xmldecl_other, key, str 364 end 365 end
on_xmldecl_other(name, value, *a)
click to toggle source
# File lib/xmlscan/scanner.rb 379 def on_xmldecl_other(name, value, *a) 380 @visitor.on_xmldecl_other name, value, *a 381 end
on_xmldecl_standalone(str, *a)
click to toggle source
# File lib/xmlscan/scanner.rb 375 def on_xmldecl_standalone(str, *a) 376 @visitor.on_xmldecl_standalone str, *a 377 end
on_xmldecl_version(str, *a)
click to toggle source
# File lib/xmlscan/scanner.rb 367 def on_xmldecl_version(str, *a) 368 @visitor.on_xmldecl_version str, *a 369 end
parse_error(msg)
click to toggle source
# File lib/xmlscan/scanner.rb 337 def parse_error(msg) 338 @visitor.parse_error msg 339 end
scan_attr_value(s)
click to toggle source
# File lib/xmlscan/scanner.rb 573 def scan_attr_value(s) # almostly copy & paste from scan_chardata 574 unless /&/ =~ s then 575 #STDERR << "no& attr_val #{s.inspect}, #{caller*"\n"}\n" if s == ?> 576 on_attr_value s 577 else 578 s = $` 579 on_attr_value s unless s.empty? 580 ref = nil 581 $'.split('&', -1).each { |s| 582 unless /(?!\A);|(?=[ \t\r\n])/ =~ s and not $&.empty? then 583 if InvalidEntityRef[@optkey] =~ s and not (ref = $`).strip.empty? 584 then 585 parse_error "reference to `#{ref}' doesn't end with `;'" 586 else 587 parse_error "`&' is not used for entity/character references" 588 on_attr_value('&' << s) 589 next 590 end 591 end 592 orig = ?& + (ref = $`) + ?; 593 s = $' 594 if /\A[^#]/ =~ ref then 595 on_attr_entityref ref, orig 596 elsif /\A#(\d+)\z/ =~ ref then 597 on_attr_charref $1.to_i, orig 598 elsif /\A#x([\dA-Fa-f]+)\z/ =~ ref then 599 on_attr_charref_hex $1.hex, orig 600 else 601 parse_error "invalid character reference `#{ref}'" 602 end 603 on_attr_value s unless s.empty? 604 } 605 end 606 end
scan_bang_tag(s)
click to toggle source
# File lib/xmlscan/scanner.rb 856 def scan_bang_tag(s) 857 parse_error "parse error at `<!'" 858 s << '>' if @src.close_tag 859 on_chardata s 860 end
scan_cdata(s)
click to toggle source
# File lib/xmlscan/scanner.rb 667 def scan_cdata(s) 668 cdata = s 669 re = CDATAPattern[@optkey] 670 until re =~ cdata and @src.close_tag 671 s = @src.get_plain 672 unless s then 673 parse_error "unterminated CDATA section meets EOF" 674 return on_cdata(cdata) 675 end 676 cdata << s 677 end 678 cdata.chop!.chop! # remove ']]' 679 on_cdata cdata 680 end
scan_chardata(s)
click to toggle source
# File lib/xmlscan/scanner.rb 532 def scan_chardata(s) 533 while true 534 unless /&/ =~ s then 535 on_chardata s 536 else 537 s = $` 538 on_chardata s unless s.empty? 539 #orig = $'.sub(/(?=;).*$/,'') 540 ref = nil 541 $'.split('&', -1).each { |s| 542 unless /(?!\A);|(?=[ \t\r\n])/ =~ s and not $&.empty? then 543 if InvalidEntityRef[@optkey] =~ s and not (ref = $`).strip.empty? 544 then 545 parse_error "reference to `#{ref}' doesn't end with `;'" 546 else 547 parse_error "`&' is not used for entity/character references" 548 on_chardata '&'+s 549 next 550 end 551 end 552 orig = ?& + (ref = $`) + ?; 553 s = $' 554 if /\A[^#]/ =~ ref then 555 on_entityref ref, orig 556 elsif /\A#(\d+)\z/ =~ ref then 557 on_charref $1.to_i, orig 558 elsif /\A#x([\dA-Fa-f]+)\z/ =~ ref then 559 on_charref_hex $1.hex, orig 560 else 561 parse_error "invalid character reference `#{ref}'" 562 end 563 on_chardata s unless s.empty? 564 } 565 end 566 s = @src.get_text 567 break unless s 568 s = '>' << s unless s == '>' 569 end 570 end
scan_comment(s)
click to toggle source
# File lib/xmlscan/scanner.rb 609 def scan_comment(s) 610 s[0,4] = '' # remove `<!--' 611 comm = '' 612 until /--/ =~ s 613 comm << s 614 s = @src.get_plain 615 unless s then 616 parse_error "unterminated comment meets EOF" 617 return on_comment(comm) 618 end 619 end 620 comm << $` 621 until (s = $').empty? and @src.close_tag 622 if s == '-' and @src.close_tag then # ---> 623 parse_error "comment ending in `--->' is not allowed" 624 comm << s 625 break 626 end 627 parse_error "comment includes `--'" 628 comm << '--' 629 until /--/ =~ s # copy & paste for performance 630 comm << s 631 s = @src.get_plain 632 unless s then 633 parse_error "unterminated comment meets EOF" 634 return on_comment(comm) 635 end 636 end 637 comm << $` 638 end 639 on_comment comm 640 end
scan_content(s)
click to toggle source
# File lib/xmlscan/scanner.rb 863 def scan_content(s) 864 src = @src # for speed 865 while s 866 if (c = s[0]) == ?< then 867 if (c = s[1]) == ?/ then 868 scan_etag s 869 elsif c == ?! then 870 if s[2] == ?- and s[3] == ?- then 871 scan_comment s 872 elsif /\A<!\[CDATA\[/ =~ s then 873 scan_cdata $' 874 else 875 scan_bang_tag s 876 end 877 elsif c == ?? then 878 scan_pi s 879 else 880 scan_stag s 881 end 882 else 883 scan_chardata s 884 end 885 s = src.get 886 end 887 end
scan_doctype(s)
click to toggle source
# File lib/xmlscan/scanner.rb 996 def scan_doctype(s) 997 root = syspub = sysid = pubid = nil 998 internal_dtd = false 999 re = DoctypePattern[@opt] 1000 begin 1001 if re =~ s then 1002 name, str, delim, s = $1, $2, $3, $' 1003 if name then 1004 if not root then 1005 root = name 1006 elsif not syspub then 1007 unless name == 'PUBLIC' or name == 'SYSTEM' then 1008 name = found_invalid_pubsys(name) 1009 end 1010 syspub = name 1011 else 1012 parse_error "parse error at `#{name}'" 1013 end 1014 elsif str then 1015 qmark = str.slice!(0,1) # remove quotation marks 1016 unless syspub then 1017 parse_error "parse error at `#{qmark}'" 1018 s = str << s 1019 else 1020 if str[-1] == qmark[0] then 1021 str.chop! 1022 else 1023 s = get_until_qmark(str, qmark) || '' 1024 end 1025 if not sysid then 1026 sysid = str 1027 elsif not pubid and syspub == 'PUBLIC' then 1028 pubid = sysid 1029 sysid = str 1030 else 1031 parse_error "too many external ID literals in DOCTYPE" 1032 end 1033 end 1034 elsif delim == '[' then 1035 internal_dtd = true 1036 break 1037 else 1038 parse_error "parse error at `#{delim}'" 1039 end 1040 else 1041 s = '' 1042 end 1043 if s.empty? then 1044 break if @src.close_tag 1045 s = @src.get_plain 1046 end 1047 end while s 1048 parse_error "unterminated DOCTYPE declaration meets EOF" unless s 1049 unless root then 1050 parse_error "no root element is specified in DOCTYPE" 1051 end 1052 if syspub and not sysid then 1053 parse_error "too few external ID literals in DOCTYPE" 1054 end 1055 if syspub == 'PUBLIC' and not pubid then 1056 pubid, sysid = sysid, nil 1057 end 1058 on_doctype root, pubid, sysid 1059 scan_internal_dtd s if internal_dtd 1060 end
scan_document()
click to toggle source
# File lib/xmlscan/scanner.rb 1098 def scan_document 1099 on_start_document '' 1100 @src.prepare 1101 scan_prolog @src.get 1102 on_end_document '' 1103 end
scan_etag(s)
click to toggle source
# File lib/xmlscan/scanner.rb 697 def scan_etag(s) 698 orig="#{s}>" 699 s[0,2] = '' # remove '</' 700 if s.empty? then 701 if @src.close_tag then # </> 702 return found_empty_etag 703 else # </< or </[EOF] 704 parse_error "parse error at `</'" 705 s << '>' if @src.close_tag 706 return on_chardata '</' << s 707 end 708 elsif /[ \t\n\r]+/ =~ s then 709 s1, s2 = $`, $' 710 if s1.empty? then # </ tag 711 parse_error "parse error at `</'" 712 s << '>' if @src.close_tag 713 return on_chardata '</' + s 714 elsif not s2.empty? then # </ta g 715 parse_error "illegal whitespace is found within end tag `#{s1}'" 716 while @src.get_tag 717 end 718 end 719 s = s1 720 end 721 found_unclosed_etag s unless @src.close_tag # </tag< or </tag[EOF] 722 on_etag s, orig 723 end
scan_internal_dtd(s)
click to toggle source
# File lib/xmlscan/scanner.rb 982 def scan_internal_dtd(s) 983 warning "internal DTD subset is not supported" 984 skip_internal_dtd s 985 end
scan_pi(s)
click to toggle source
# File lib/xmlscan/scanner.rb 643 def scan_pi(s) 644 unless /\A<\?([^ \t\n\r?]+)(?:[ \t\n\r]+|(?=\?\z))/ =~ s then 645 parse_error "parse error at `<?'" 646 s << '>' if @src.close_tag 647 on_chardata s 648 else 649 target = $1 650 pi = $' 651 until pi[-1] == ?? and @src.close_tag 652 s = @src.get_plain 653 unless s then 654 parse_error "unterminated PI meets EOF" 655 return on_pi(target, pi) 656 end 657 pi << s 658 end 659 pi.chop! # remove last `?' 660 on_pi target, pi 661 end 662 end
scan_prolog(s)
click to toggle source
# File lib/xmlscan/scanner.rb 1063 def scan_prolog(s) 1064 if /\A<\?xml(?=[ \t\n\r])/ =~ s then 1065 scan_xmldecl $' 1066 s = @src.get 1067 end 1068 doctype = true 1069 src = @src # for speed 1070 while s 1071 if s[0] == ?< then 1072 if (c = s[1]) == ?! then 1073 if s[2] == ?- and s[3] == ?- then 1074 scan_comment s 1075 elsif /\A<!DOCTYPE(?=[ \t\n\r])/ =~ s and doctype then 1076 doctype = false 1077 scan_doctype $' 1078 else 1079 break 1080 end 1081 elsif c == ?? then 1082 scan_pi s 1083 else 1084 break 1085 end 1086 s = src.get 1087 elsif /[^ \t\r\n]/ !~ s then 1088 on_prolog_space s unless s.empty? 1089 s = src.get_plain 1090 else 1091 break 1092 end 1093 end 1094 scan_content(s || src.get) 1095 end
scan_stag(s)
click to toggle source
# File lib/xmlscan/scanner.rb 761 def scan_stag(s) 762 hash = {} 763 orig = [s.dup] 764 unless /(?=[\/ \t\n\r='"])/ =~ s then 765 name = s 766 name[0,1] = '' # remove `<' 767 if name.empty? then 768 if @src.close_tag then # <> 769 return found_empty_stag 770 else # << or <[EOF] 771 parse_error "parse error at `<'" 772 return on_chardata '<' 773 end 774 end 775 on_stag name 776 found_unclosed_stag name unless @src.close_tag 777 on_stag_end name, orig*''+?>, {} 778 else 779 k = nil 780 name = $` 781 s = $' 782 name[0,1] = '' # remove `<' 783 if name.empty? then # `< tag' or `<=` 784 parse_error "parse error at `<'" 785 s << '>' if @src.close_tag 786 return on_chardata '<' << s 787 end 788 on_stag name 789 emptyelem = false 790 begin 791 continue = false 792 s.scan(/[ \t\n\r]([^= \t\n\r\/'"]+)[ \t\n\r]*=[ \t\n\r]*('[^']*'?|"[^"]*"?)|\/\z|([^ \t\n\r][\S\s]*)/ 793 ) { |key,val,error| 794 orig_val = [] 795 if key then 796 on_attribute key 797 k=key 798 orig_val << val 799 qmark = val.slice!(0,1) 800 if val[-1] == qmark[0] then 801 val.chop! 802 scan_attr_value val unless val.empty? 803 else 804 scan_attr_value val unless val.empty? 805 begin 806 s = @src.get 807 #STDERR << "get some more? #{s.inspect}, #{orig.inspect}\n" 808 unless s then 809 parse_error "unterminated attribute `#{key}' meets EOF" 810 break 811 end 812 orig << s.dup 813 c = s[0] 814 val, s = s.split(qmark, 2) 815 orig_val << val 816 if c == ?< then 817 wellformed_error "`<' is found in attribute `#{key}'" 818 elsif c != ?> then 819 #STDERR << "close in quote? #{c.inspect}, #{@src.tag_start?}, #{@src.tag_end?}, #{s.inspect}, #{val.inspect}, #{orig.inspect}, #{orig_val.inspect}\n" 820 orig_val[-1,0] = orig[-1,0] = ?> # if @src.tag_start? 821 scan_attr_value ?> 822 end 823 scan_attr_value val if c 824 end until s 825 continue = s # if eof then continue is false, else true. 826 end 827 #STDERR << "attr:#{k}, #{orig_val}\n" 828 hash[k] = orig_val*'' 829 #STDERR << "attr end #{hash.inspect}, #{k}, #{orig_val}\n" 830 on_attribute_end key #, orig_val*'' 831 elsif error then 832 continue = s = found_stag_error(error) 833 else 834 emptyelem = true 835 end 836 } 837 end while continue 838 unless @src.close_tag then 839 if emptyelem then 840 found_unclosed_emptyelem name 841 else 842 found_unclosed_stag name 843 end 844 end 845 if emptyelem then 846 on_stag_end_empty name, orig*''+?>, hash 847 else 848 #STDERR << "on stag end #{ name}, \"<#{name}#{s}>\", #{hash.inspect}\n" 849 on_stag_end name, orig*''+?>, hash 850 #on_stag_end name, "<#{name}#{s}>", hash 851 end 852 end 853 end
scan_xmldecl(s)
click to toggle source
# File lib/xmlscan/scanner.rb 906 def scan_xmldecl(s) 907 endmark = nil 908 info = nil 909 state = 0 910 on_xmldecl 911 begin 912 continue = false 913 s.scan(XMLDeclPattern[@optkey]) { |key,val,endtok,error| 914 if key then 915 qmark = val.slice!(0,1) # remove quotation marks 916 if val[-1] == qmark[0] then 917 val.chop! 918 else 919 continue = s = get_until_qmark(val, qmark) 920 unless s then 921 parse_error "unterminated XML declaration meets EOF" 922 endmark = true 923 end 924 end 925 newstate = case state 926 when 0; key == 'version' ? 1 : 4 927 when 1; key == 'encoding' ? 2 : key == 'standalone' ? 3 : 4 928 else key == 'standalone' ? 3 : 4 929 end 930 state = if newstate == 4 931 known=%w{version encoding standalone}.member?(key) 932 parse_error known ? "#{key} declaration must not be here" : 933 "unknown declaration `#{key}' in XML declaration" 934 state < 2 ? 2 : 3 935 else newstate end 936 on_xmldecl_key key, val 937 elsif endtok then 938 endmark = if ct=@src.close_tag 939 true 940 else 941 parse_error "unexpected `#{endmark}' found in XML declaration" 942 nil 943 end 944 # here always exit the loop. 945 else 946 parse_error "parse error at `#{error}'" 947 end 948 } 949 end while !endmark and continue || s = @src.get_plain 950 parse_error "unterminated XML declaration meets EOF" unless s or endmark 951 parse_error "no declaration found in XML declaration" if state == 0 952 on_xmldecl_end 953 end
skip_internal_dtd(s)
click to toggle source
# File lib/xmlscan/scanner.rb 958 def skip_internal_dtd(s) 959 quote = nil 960 continue = true 961 begin # skip until `]>' 962 s.scan(SkipDTD[@optkey]) { |q,| #' 963 if quote then 964 quote = nil if quote == q and quote.size == 1 || @src.tag_end? 965 elsif q then 966 if q == '<!--' then 967 quote = '--' 968 elsif q == '<?' then 969 quote = '?' 970 elsif q == '"' or q == "'" then 971 quote = q 972 end 973 elsif @src.close_tag then 974 continue = false 975 end 976 } 977 end while continue and s = @src.get 978 parse_error "unterminated internal DTD subset meets EOF" unless s 979 end
valid_error(msg)
click to toggle source
# File lib/xmlscan/scanner.rb 345 def valid_error(msg) 346 @visitor.valid_error msg 347 end
warning(msg)
click to toggle source
# File lib/xmlscan/scanner.rb 349 def warning(msg) 350 @visitor.warning msg 351 end
wellformed_error(msg)
click to toggle source
# File lib/xmlscan/scanner.rb 341 def wellformed_error(msg) 342 @visitor.wellformed_error msg 343 end