class XMLScan::XMLScanner

Constants

CDATAPattern
DoctypePattern
E_OPT_EXAMPLE
InvalidEntityRef
S_OPT_EXAMPLE
SkipDTD
XMLDeclPattern

Attributes

optkey[RW]

Public Class Methods

apply_option(instance, option) click to toggle source
    # File lib/xmlscan/scanner.rb
267 def apply_option(instance, option)
268   instance.__send__ "apply_option_#{option}"
269 end
new(visitor, *options) click to toggle source
Calls superclass method
    # File lib/xmlscan/scanner.rb
292 def new(visitor, *options)
293   instance = super(visitor)
294   apply_options instance, options
295 end
new(visitor) click to toggle source
    # File lib/xmlscan/scanner.rb
301 def initialize(visitor)
302   @visitor = visitor
303   @decoration = nil
304   @src = nil
305   @optkey = nil
306 end
provided_options() click to toggle source
    # File lib/xmlscan/scanner.rb
259 def provided_options
260   options = []
261   private_instance_methods.each { |i|
262     options.push $' if /\Aapply_option_/ =~ i
263   }
264   options
265 end

Private Class Methods

apply_options(instance, options) click to toggle source
    # File lib/xmlscan/scanner.rb
271 def apply_options(instance, options)
272   h = {}
273   options.each { |i| h[i.to_s] = true }
274   options = h
275   ancestors.each { |klass|
276     if klass.respond_to? :provided_options then
277       klass.provided_options.each { |i|
278         if options.include? i then
279           options.delete i
280           klass.apply_option instance, i
281         end
282       }
283     end
284   }
285   unless options.empty? then
286     raise ArgumentError, "undefined option `#{options.keys[0]}'"
287   end
288   instance
289 end

Public Instance Methods

lineno() click to toggle source
    # File lib/xmlscan/scanner.rb
322 def lineno
323   @src && @src.lineno
324 end
opt_encoding() click to toggle source
    # File lib/xmlscan/scanner.rb
310 def opt_encoding() OptRegexp::RE_ENCODINGS[optkey] end
parse(src)
Alias for: parse_document
parse_document(src) click to toggle source
     # File lib/xmlscan/scanner.rb
1113 def parse_document(src)
1114   @src = make_source(src)
1115   begin
1116     scan_document
1117   ensure
1118     @src = nil
1119   end
1120   self
1121 end
Also aliased as: parse
path() click to toggle source
    # File lib/xmlscan/scanner.rb
326 def path
327   @src && @src.path
328 end
source() click to toggle source
    # File lib/xmlscan/scanner.rb
330 def source
331   @src.source
332 end

Private Instance Methods

apply_option_strict_char() click to toggle source
    # File lib/xmlscan/xmlchar.rb
216 def apply_option_strict_char
217   extend StrictChar
218 end
decorate(decoration) click to toggle source
    # File lib/xmlscan/scanner.rb
313 def decorate(decoration)
314   unless @decoration then
315     @visitor = @decoration = Decoration.new(@visitor)
316   end
317   @decoration.expand decoration
318 end
found_empty_etag() click to toggle source
    # File lib/xmlscan/scanner.rb
691 def found_empty_etag
692   parse_error "parse error at `</'"
693   on_chardata '</>'
694 end
found_empty_stag() click to toggle source
    # File lib/xmlscan/scanner.rb
726 def found_empty_stag
727   parse_error "parse error at `<'"
728   on_chardata '<>'
729 end
found_invalid_pubsys(pubsys) click to toggle source
    # File lib/xmlscan/scanner.rb
988 def found_invalid_pubsys(pubsys)
989   parse_error "`PUBLIC' or `SYSTEM' should be here"
990   'SYSTEM'
991 end
found_stag_error(s) click to toggle source
    # File lib/xmlscan/scanner.rb
748 def found_stag_error(s)
749   if /\A[\/='"]/ =~ s then
750     tok, s = $&, $'
751   elsif /(?=[ \t\n\r\/='"])/ =~ s then
752     tok, s = $`, $'
753   else
754     tok, s = s, nil
755   end
756   parse_error "parse error at `#{tok}'"
757   s
758 end
found_unclosed_emptyelem(name) click to toggle source
    # File lib/xmlscan/scanner.rb
739 def found_unclosed_emptyelem(name)
740   if @src.tag_start? then
741     parse_error "unclosed empty element tag `#{name}' meets another tag"
742   else
743     parse_error "unclosed empty element tag `#{name}' meets EOF"
744   end
745 end
found_unclosed_etag(name) click to toggle source
    # File lib/xmlscan/scanner.rb
683 def found_unclosed_etag(name)
684   if @src.tag_start? then
685     parse_error "unclosed end tag `#{name}' meets another tag"
686   else
687     parse_error "unclosed end tag `#{name}' meets EOF"
688   end
689 end
found_unclosed_stag(name) click to toggle source
    # File lib/xmlscan/scanner.rb
731 def found_unclosed_stag(name)
732   if @src.tag_start? then
733     parse_error "unclosed start tag `#{name}' meets another tag"
734   else
735     parse_error "unclosed start tag `#{name}' meets EOF"
736   end
737 end
get_until_qmark(str, qmark) click to toggle source
    # File lib/xmlscan/scanner.rb
890 def get_until_qmark(str, qmark)
891   begin
892     #s = @src.get_plain
893     s = @src.get
894     break unless s
895     c = s[0]
896     v, s = s.split(qmark, 2)
897     str << '>' unless c == ?< or c == ?>  # De Morgan
898     str << v if c
899   end until s
900   s
901 end
make_source(src) click to toggle source
     # File lib/xmlscan/scanner.rb
1106 def make_source(src)
1107   Source.new src
1108 end
on_attr_charref(code, *a) click to toggle source
    # File lib/xmlscan/scanner.rb
473 def on_attr_charref(code, *a)
474   @visitor.on_attr_charref code, *a
475 end
on_attr_charref_hex(code, *a) click to toggle source
    # File lib/xmlscan/scanner.rb
477 def on_attr_charref_hex(code, *a)
478   @visitor.on_attr_charref_hex code, *a
479 end
on_attr_entityref(ref, *a) click to toggle source
    # File lib/xmlscan/scanner.rb
469 def on_attr_entityref(ref, *a)
470   @visitor.on_attr_entityref ref, *a
471 end
on_attr_value(str, *a) click to toggle source
    # File lib/xmlscan/scanner.rb
465 def on_attr_value(str, *a)
466   @visitor.on_attr_value str, *a
467 end
on_attribute(name, *a) click to toggle source
    # File lib/xmlscan/scanner.rb
461 def on_attribute(name, *a)
462   @visitor.on_attribute name, *a
463 end
on_attribute_end(name, *a) click to toggle source
    # File lib/xmlscan/scanner.rb
481 def on_attribute_end(name, *a)
482   @visitor.on_attribute_end name, *a, *a
483 end
on_cdata(str, *a) click to toggle source
    # File lib/xmlscan/scanner.rb
407 def on_cdata(str, *a)
408   @visitor.on_cdata str, *a
409 end
on_chardata(str, *a) click to toggle source
    # File lib/xmlscan/scanner.rb
403 def on_chardata(str, *a)
404   @visitor.on_chardata str, *a
405 end
on_charref(code, *a) click to toggle source
    # File lib/xmlscan/scanner.rb
419 def on_charref(code, *a)
420   @visitor.on_charref code, *a
421 end
on_charref_hex(code, *a) click to toggle source
    # File lib/xmlscan/scanner.rb
423 def on_charref_hex(code, *a)
424   @visitor.on_charref_hex code, *a
425 end
on_comment(str, *a) click to toggle source
    # File lib/xmlscan/scanner.rb
395 def on_comment(str, *a)
396   @visitor.on_comment str, *a
397 end
on_doctype(root, pubid, sysid, *a) click to toggle source
    # File lib/xmlscan/scanner.rb
387 def on_doctype(root, pubid, sysid, *a)
388   @visitor.on_doctype root, pubid, sysid, *a
389 end
on_end_document(*a) click to toggle source
    # File lib/xmlscan/scanner.rb
431 def on_end_document(*a)
432   @visitor.on_end_document *a
433 end
on_entityref(ref, *a) click to toggle source
    # File lib/xmlscan/scanner.rb
415 def on_entityref(ref, *a)
416   @visitor.on_entityref ref, *a
417 end
on_etag(name, *a) click to toggle source
    # File lib/xmlscan/scanner.rb
411 def on_etag(name, *a)
412   @visitor.on_etag name, *a
413 end
on_pi(target, pi, *a) click to toggle source
    # File lib/xmlscan/scanner.rb
399 def on_pi(target, pi, *a)
400   @visitor.on_pi target, pi, *a
401 end
on_prolog_space(str, *a) click to toggle source
    # File lib/xmlscan/scanner.rb
391 def on_prolog_space(str, *a)
392   @visitor.on_prolog_space str, *a
393 end
on_stag(name, *a) click to toggle source

<hoge fuga=“foo&bar;&#38;&#x26;foo” />HOGE ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ 1 2 3 4 5 6 7 8 9 A

The following method will be called with the following arguments when the parser reaches the above point;

1: on_stag              ('hoge')
2: on_attribute         ('fuga')
3: on_attr_value        ('foo')
4: on_attr_entityref    ('bar')
5: on_attr_charref      (38)
6: on_attr_charref_hex  (38)
7: on_attr_value        ('foo')
8: on_attribute_end     ('fuga')
9: on_stag_end_empty    ('hoge')
     or
   on_stag_end          ('hoge')

A: on_chardata          ('HOGE')
    # File lib/xmlscan/scanner.rb
457 def on_stag(name, *a)
458   @visitor.on_stag name, *a
459 end
on_stag_end(name, *a) click to toggle source
    # File lib/xmlscan/scanner.rb
489 def on_stag_end(name, *a)
490   #STDERR << "ose #{name}, #{a.inspect}\n"
491   @visitor.on_stag_end name, *a
492 end
on_stag_end_empty(name, *a) click to toggle source
    # File lib/xmlscan/scanner.rb
485 def on_stag_end_empty(name, *a)
486   @visitor.on_stag_end_empty name, *a
487 end
on_start_document(*a) click to toggle source
    # File lib/xmlscan/scanner.rb
427 def on_start_document(*a)
428   @visitor.on_start_document *a
429 end
on_xmldecl() click to toggle source
    # File lib/xmlscan/scanner.rb
354 def on_xmldecl
355   @visitor.on_xmldecl
356 end
on_xmldecl_encoding(str, *a) click to toggle source
    # File lib/xmlscan/scanner.rb
371 def on_xmldecl_encoding(str, *a)
372   @visitor.on_xmldecl_encoding str, *a
373 end
on_xmldecl_end(*a) click to toggle source
    # File lib/xmlscan/scanner.rb
383 def on_xmldecl_end(*a)
384   @visitor.on_xmldecl_end *a
385 end
on_xmldecl_key(key, str) click to toggle source
    # File lib/xmlscan/scanner.rb
358 def on_xmldecl_key(key, str)
359   meth = "on_xmldecl_#{key}"
360   if @visitor.respond_to? meth
361     self.send meth, str
362   else
363     self.send :on_xmldecl_other, key, str
364   end
365 end
on_xmldecl_other(name, value, *a) click to toggle source
    # File lib/xmlscan/scanner.rb
379 def on_xmldecl_other(name, value, *a)
380   @visitor.on_xmldecl_other name, value, *a
381 end
on_xmldecl_standalone(str, *a) click to toggle source
    # File lib/xmlscan/scanner.rb
375 def on_xmldecl_standalone(str, *a)
376   @visitor.on_xmldecl_standalone str, *a
377 end
on_xmldecl_version(str, *a) click to toggle source
    # File lib/xmlscan/scanner.rb
367 def on_xmldecl_version(str, *a)
368   @visitor.on_xmldecl_version str, *a
369 end
parse_error(msg) click to toggle source
    # File lib/xmlscan/scanner.rb
337 def parse_error(msg)
338   @visitor.parse_error msg
339 end
scan_attr_value(s) click to toggle source
    # File lib/xmlscan/scanner.rb
573 def scan_attr_value(s)     # almostly copy & paste from scan_chardata
574   unless /&/ =~ s then
575     #STDERR << "no& attr_val #{s.inspect}, #{caller*"\n"}\n" if s == ?>
576     on_attr_value s
577   else
578     s = $`
579     on_attr_value s unless s.empty?
580     ref = nil
581     $'.split('&', -1).each { |s|
582       unless /(?!\A);|(?=[ \t\r\n])/ =~ s and not $&.empty? then
583         if InvalidEntityRef[@optkey] =~ s and not (ref = $`).strip.empty?
584         then
585           parse_error "reference to `#{ref}' doesn't end with `;'"
586         else
587           parse_error "`&' is not used for entity/character references"
588           on_attr_value('&' << s)
589           next
590         end
591       end
592       orig = ?& + (ref = $`) + ?;
593       s = $'
594       if /\A[^#]/ =~ ref then
595         on_attr_entityref ref, orig
596       elsif /\A#(\d+)\z/ =~ ref then
597         on_attr_charref $1.to_i, orig
598       elsif /\A#x([\dA-Fa-f]+)\z/ =~ ref then
599         on_attr_charref_hex $1.hex, orig
600       else
601         parse_error "invalid character reference `#{ref}'"
602       end
603       on_attr_value s unless s.empty?
604     }
605   end
606 end
scan_bang_tag(s) click to toggle source
    # File lib/xmlscan/scanner.rb
856 def scan_bang_tag(s)
857   parse_error "parse error at `<!'"
858   s << '>' if @src.close_tag
859   on_chardata s
860 end
scan_cdata(s) click to toggle source
    # File lib/xmlscan/scanner.rb
667 def scan_cdata(s)
668   cdata = s
669   re = CDATAPattern[@optkey]
670   until re =~ cdata and @src.close_tag
671     s = @src.get_plain
672     unless s then
673       parse_error "unterminated CDATA section meets EOF"
674       return on_cdata(cdata)
675     end
676     cdata << s
677   end
678   cdata.chop!.chop!  # remove ']]'
679   on_cdata cdata
680 end
scan_chardata(s) click to toggle source
    # File lib/xmlscan/scanner.rb
532 def scan_chardata(s)
533   while true
534     unless /&/ =~ s then
535       on_chardata s
536     else
537       s = $`
538       on_chardata s unless s.empty?
539       #orig = $'.sub(/(?=;).*$/,'')
540       ref = nil
541       $'.split('&', -1).each { |s|
542         unless /(?!\A);|(?=[ \t\r\n])/ =~ s and not $&.empty? then
543           if InvalidEntityRef[@optkey] =~ s and not (ref = $`).strip.empty?
544           then
545             parse_error "reference to `#{ref}' doesn't end with `;'"
546           else
547             parse_error "`&' is not used for entity/character references"
548             on_chardata '&'+s
549             next
550           end
551         end
552         orig = ?& + (ref = $`) + ?;
553         s = $'
554         if /\A[^#]/ =~ ref then
555           on_entityref ref, orig
556         elsif /\A#(\d+)\z/ =~ ref then
557           on_charref $1.to_i, orig
558         elsif /\A#x([\dA-Fa-f]+)\z/ =~ ref then
559           on_charref_hex $1.hex, orig
560         else
561           parse_error "invalid character reference `#{ref}'"
562         end
563         on_chardata s unless s.empty?
564       }
565     end
566     s = @src.get_text
567     break unless s
568     s = '>' << s unless s == '>'
569   end
570 end
scan_comment(s) click to toggle source
    # File lib/xmlscan/scanner.rb
609 def scan_comment(s)
610   s[0,4] = ''  # remove `<!--'
611   comm = ''
612   until /--/ =~ s
613     comm << s
614     s = @src.get_plain
615     unless s then
616       parse_error "unterminated comment meets EOF"
617       return on_comment(comm)
618     end
619   end
620   comm << $`
621   until (s = $').empty? and @src.close_tag
622     if s == '-' and @src.close_tag then      # --->
623       parse_error "comment ending in `--->' is not allowed"
624       comm << s
625       break
626     end
627     parse_error "comment includes `--'"
628     comm << '--'
629     until /--/ =~ s     # copy & paste for performance
630       comm << s
631       s = @src.get_plain
632       unless s then
633         parse_error "unterminated comment meets EOF"
634         return on_comment(comm)
635       end
636     end
637     comm << $`
638   end
639   on_comment comm
640 end
scan_content(s) click to toggle source
    # File lib/xmlscan/scanner.rb
863 def scan_content(s)
864   src = @src  # for speed
865   while s
866     if (c = s[0]) == ?< then
867       if (c = s[1]) == ?/ then
868         scan_etag s
869       elsif c == ?! then
870         if s[2] == ?- and s[3] == ?- then
871           scan_comment s
872         elsif /\A<!\[CDATA\[/ =~ s then
873           scan_cdata $'
874         else
875           scan_bang_tag s
876         end
877       elsif c == ?? then
878         scan_pi s
879       else
880         scan_stag s
881       end
882     else
883       scan_chardata s
884     end
885     s = src.get
886   end
887 end
scan_doctype(s) click to toggle source
     # File lib/xmlscan/scanner.rb
 996 def scan_doctype(s)
 997   root = syspub = sysid = pubid = nil
 998   internal_dtd = false
 999   re = DoctypePattern[@opt]
1000   begin
1001     if re =~ s then
1002       name, str, delim, s = $1, $2, $3, $'
1003       if name then
1004         if not root then
1005           root = name
1006         elsif not syspub then
1007           unless name == 'PUBLIC' or name == 'SYSTEM' then
1008             name = found_invalid_pubsys(name)
1009           end
1010           syspub = name
1011         else
1012           parse_error "parse error at `#{name}'"
1013         end
1014       elsif str then
1015         qmark = str.slice!(0,1)     # remove quotation marks
1016         unless syspub then
1017           parse_error "parse error at `#{qmark}'"
1018           s = str << s
1019         else
1020           if str[-1] == qmark[0] then
1021             str.chop!
1022           else
1023             s = get_until_qmark(str, qmark) || ''
1024           end
1025           if not sysid then
1026             sysid = str
1027           elsif not pubid and syspub == 'PUBLIC' then
1028             pubid = sysid
1029             sysid = str
1030           else
1031             parse_error "too many external ID literals in DOCTYPE"
1032           end
1033         end
1034       elsif delim == '[' then
1035         internal_dtd = true
1036         break
1037       else
1038         parse_error "parse error at `#{delim}'"
1039       end
1040     else
1041       s = ''
1042     end
1043     if s.empty? then
1044       break if @src.close_tag
1045       s = @src.get_plain
1046     end
1047   end while s
1048   parse_error "unterminated DOCTYPE declaration meets EOF" unless s
1049   unless root then
1050     parse_error "no root element is specified in DOCTYPE"
1051   end
1052   if syspub and not sysid then
1053     parse_error "too few external ID literals in DOCTYPE"
1054   end
1055   if syspub == 'PUBLIC' and not pubid then
1056     pubid, sysid = sysid, nil
1057   end
1058   on_doctype root, pubid, sysid
1059   scan_internal_dtd s if internal_dtd
1060 end
scan_document() click to toggle source
     # File lib/xmlscan/scanner.rb
1098 def scan_document
1099   on_start_document ''
1100   @src.prepare
1101   scan_prolog @src.get
1102   on_end_document ''
1103 end
scan_etag(s) click to toggle source
    # File lib/xmlscan/scanner.rb
697 def scan_etag(s)
698   orig="#{s}>"
699   s[0,2] = ''  # remove '</'
700   if s.empty? then
701     if @src.close_tag then   # </>
702       return found_empty_etag
703     else                     # </< or </[EOF]
704       parse_error "parse error at `</'"
705       s << '>' if @src.close_tag
706       return on_chardata '</' << s
707     end
708   elsif /[ \t\n\r]+/ =~ s then
709     s1, s2 = $`, $'
710     if s1.empty? then                # </ tag
711       parse_error "parse error at `</'"
712       s << '>' if @src.close_tag
713       return on_chardata '</' + s
714     elsif not s2.empty? then         # </ta g
715       parse_error "illegal whitespace is found within end tag `#{s1}'"
716       while @src.get_tag
717       end
718     end
719     s = s1
720   end
721   found_unclosed_etag s unless @src.close_tag   # </tag< or </tag[EOF]
722   on_etag s, orig
723 end
scan_internal_dtd(s) click to toggle source
    # File lib/xmlscan/scanner.rb
982 def scan_internal_dtd(s)
983   warning "internal DTD subset is not supported"
984   skip_internal_dtd s
985 end
scan_pi(s) click to toggle source
    # File lib/xmlscan/scanner.rb
643 def scan_pi(s)
644   unless /\A<\?([^ \t\n\r?]+)(?:[ \t\n\r]+|(?=\?\z))/ =~ s then
645     parse_error "parse error at `<?'"
646     s << '>' if @src.close_tag
647     on_chardata s
648   else
649     target = $1
650     pi = $'
651     until pi[-1] == ?? and @src.close_tag
652       s = @src.get_plain
653       unless s then
654         parse_error "unterminated PI meets EOF"
655         return on_pi(target, pi)
656       end
657       pi << s
658     end
659     pi.chop!       # remove last `?'
660     on_pi target, pi
661   end
662 end
scan_prolog(s) click to toggle source
     # File lib/xmlscan/scanner.rb
1063 def scan_prolog(s)
1064   if /\A<\?xml(?=[ \t\n\r])/ =~ s then
1065     scan_xmldecl $'
1066     s = @src.get
1067   end
1068   doctype = true
1069   src = @src  # for speed
1070   while s
1071     if s[0] == ?< then
1072       if (c = s[1]) == ?! then
1073         if s[2] == ?- and s[3] == ?- then
1074           scan_comment s
1075         elsif /\A<!DOCTYPE(?=[ \t\n\r])/ =~ s and doctype then
1076           doctype = false
1077           scan_doctype $'
1078         else
1079           break
1080         end
1081       elsif c == ?? then
1082         scan_pi s
1083       else
1084         break
1085       end
1086       s = src.get
1087     elsif /[^ \t\r\n]/ !~ s then
1088       on_prolog_space s unless s.empty?
1089       s = src.get_plain
1090     else
1091       break
1092     end
1093   end
1094   scan_content(s || src.get)
1095 end
scan_stag(s) click to toggle source
    # File lib/xmlscan/scanner.rb
761 def scan_stag(s)
762   hash = {}
763   orig = [s.dup] 
764   unless /(?=[\/ \t\n\r='"])/ =~ s then
765     name = s
766     name[0,1] = ''        # remove `<'
767     if name.empty? then
768       if @src.close_tag then   # <>
769         return found_empty_stag
770       else                     # << or <[EOF]
771         parse_error "parse error at `<'"
772         return on_chardata '<'
773       end
774     end
775     on_stag name
776     found_unclosed_stag name unless @src.close_tag
777     on_stag_end name, orig*''+?>, {}
778   else
779     k = nil
780     name = $`
781     s = $'
782     name[0,1] = ''        # remove `<'
783     if name.empty? then   # `< tag' or `<=`
784       parse_error "parse error at `<'"
785       s << '>' if @src.close_tag
786       return on_chardata '<' << s
787     end
788     on_stag name
789     emptyelem = false
790     begin
791       continue = false
792       s.scan(/[ \t\n\r]([^= \t\n\r\/'"]+)[ \t\n\r]*=[ \t\n\r]*('[^']*'?|"[^"]*"?)|\/\z|([^ \t\n\r][\S\s]*)/
793              ) { |key,val,error|
794         orig_val = []
795         if key then
796           on_attribute key
797           k=key
798           orig_val << val
799           qmark = val.slice!(0,1)
800           if val[-1] == qmark[0] then
801             val.chop!
802             scan_attr_value val unless val.empty?
803           else
804             scan_attr_value val unless val.empty?
805             begin
806               s = @src.get
807               #STDERR << "get some more? #{s.inspect}, #{orig.inspect}\n"
808               unless s then
809                 parse_error "unterminated attribute `#{key}' meets EOF"
810                 break
811               end
812               orig << s.dup
813               c = s[0]
814               val, s = s.split(qmark, 2)
815               orig_val << val
816               if c == ?< then
817                 wellformed_error "`<' is found in attribute `#{key}'"
818               elsif c != ?> then
819                 #STDERR << "close in quote? #{c.inspect}, #{@src.tag_start?}, #{@src.tag_end?}, #{s.inspect}, #{val.inspect}, #{orig.inspect}, #{orig_val.inspect}\n"
820                 orig_val[-1,0] = orig[-1,0] = ?> # if @src.tag_start?
821                 scan_attr_value ?>
822               end
823               scan_attr_value val if c
824             end until s
825             continue = s      # if eof then continue is false, else true.
826           end
827           #STDERR << "attr:#{k}, #{orig_val}\n"
828           hash[k] = orig_val*''
829           #STDERR << "attr end #{hash.inspect}, #{k}, #{orig_val}\n"
830           on_attribute_end key #, orig_val*''
831         elsif error then
832           continue = s = found_stag_error(error)
833         else
834           emptyelem = true
835         end
836       }
837     end while continue
838     unless @src.close_tag then
839       if emptyelem then
840         found_unclosed_emptyelem name
841       else
842         found_unclosed_stag name
843       end
844     end
845     if emptyelem then
846       on_stag_end_empty name, orig*''+?>, hash
847     else
848       #STDERR << "on stag end #{ name}, \"<#{name}#{s}>\", #{hash.inspect}\n"
849       on_stag_end name, orig*''+?>, hash
850       #on_stag_end name, "<#{name}#{s}>", hash
851     end
852   end
853 end
scan_xmldecl(s) click to toggle source
    # File lib/xmlscan/scanner.rb
906 def scan_xmldecl(s)
907   endmark = nil
908   info = nil
909   state = 0
910   on_xmldecl
911   begin
912     continue = false
913     s.scan(XMLDeclPattern[@optkey]) { |key,val,endtok,error|
914       if key then
915         qmark = val.slice!(0,1)     # remove quotation marks
916         if val[-1] == qmark[0] then
917           val.chop!
918         else
919           continue = s = get_until_qmark(val, qmark)
920           unless s then
921             parse_error "unterminated XML declaration meets EOF"
922             endmark = true
923           end
924         end
925         newstate = case state
926             when 0; key == 'version' ? 1 : 4
927             when 1; key == 'encoding' ? 2 : key == 'standalone' ? 3 : 4
928             else    key == 'standalone' ? 3 : 4
929           end
930         state = if newstate == 4
931             known=%w{version encoding standalone}.member?(key)
932             parse_error known ?  "#{key} declaration must not be here" :
933                 "unknown declaration `#{key}' in XML declaration"
934             state < 2 ? 2 : 3
935           else newstate end
936         on_xmldecl_key key, val
937       elsif endtok then
938         endmark = if ct=@src.close_tag
939             true
940           else
941             parse_error "unexpected `#{endmark}' found in XML declaration"
942             nil
943         end
944         # here always exit the loop.
945       else
946         parse_error "parse error at `#{error}'"
947       end
948     }
949   end while !endmark and continue || s = @src.get_plain
950   parse_error "unterminated XML declaration meets EOF" unless s or endmark
951   parse_error "no declaration found in XML declaration" if state == 0
952   on_xmldecl_end
953 end
skip_internal_dtd(s) click to toggle source
    # File lib/xmlscan/scanner.rb
958 def skip_internal_dtd(s)
959   quote = nil
960   continue = true
961   begin                                         # skip until `]>'
962     s.scan(SkipDTD[@optkey]) { |q,|  #'
963       if quote then
964         quote = nil if quote == q and quote.size == 1 || @src.tag_end?
965       elsif q then
966         if q == '<!--' then
967           quote = '--'
968         elsif q == '<?' then
969           quote = '?'
970         elsif q == '"' or q == "'" then
971           quote = q
972         end
973       elsif @src.close_tag then
974         continue = false
975       end
976     }
977   end while continue and s = @src.get
978   parse_error "unterminated internal DTD subset meets EOF" unless s
979 end
valid_error(msg) click to toggle source
    # File lib/xmlscan/scanner.rb
345 def valid_error(msg)
346   @visitor.valid_error msg
347 end
warning(msg) click to toggle source
    # File lib/xmlscan/scanner.rb
349 def warning(msg)
350   @visitor.warning msg
351 end
wellformed_error(msg) click to toggle source
    # File lib/xmlscan/scanner.rb
341 def wellformed_error(msg)
342   @visitor.wellformed_error msg
343 end