class XMLScan::XMLParser

Public Instance Methods

parse(*) click to toggle source

PredefinedEntity = {

'lt'   => '<',
'gt'   => '>',
'amp'  => '&',
'quot' => '"',
'apos' => "'",

}

Calls superclass method
   # File lib/xmlscan/parser.rb
36 def parse(*)
37   @elem = []
38   @attr = AttributeChecker.new
39   @standalone = nil
40   super
41 end
scan_content(s) click to toggle source
    # File lib/xmlscan/parser.rb
163 def scan_content(s)
164   elem = @elem  # for speed
165   src = @src  # for speed
166   found_root_element = false
167 
168   begin
169 
170     # -- first start tag --
171     elem.clear
172     found_stag = false
173 
174     while s and not found_stag
175       if (c = s[0]) == ?< then
176         if (c = s[1]) == ?/ then
177           # should be a parse error
178           scan_etag s
179         elsif c == ?! then
180           if s[2] == ?- and s[3] == ?- then
181             scan_comment s
182           elsif /\A<!\[CDATA\[/n =~ s then
183             parse_error "CDATA section is found outside of root element"
184             scan_cdata $'
185           else
186             scan_bang_tag s
187           end
188         elsif c == ?? then
189           scan_pi s
190         else
191           found_root_element = true
192           found_stag = true
193           scan_stag s
194         end
195       else
196         parse_error "content of element is found outside of root element"
197         scan_chardata s
198       end
199       s = src.get
200     end
201 
202     if not found_root_element and not found_stag then
203       parse_error "no root element was found"
204     end
205 
206     # -- contents --
207     while s and not elem.empty?
208       if (c = s[0]) == ?< then
209         if (c = s[1]) == ?/ then
210           scan_etag s
211         elsif c == ?! then
212           if s[2] == ?- and s[3] == ?- then
213             scan_comment s
214           elsif /\A<!\[CDATA\[/n =~ s then
215             scan_cdata $'
216           else
217             scan_bang_tag s
218           end
219         elsif c == ?? then
220           scan_pi s
221         else
222           scan_stag s
223         end
224       else
225         scan_chardata s
226       end
227       s = src.get
228     end
229 
230     unless elem.empty? then
231       while name = elem.pop
232         parse_error "unclosed element `#{name}' meets EOF"
233         @visitor.on_etag name
234       end
235     end
236 
237     # -- epilogue --
238     finish = true
239 
240     while s
241       if (c = s[0]) == ?< then
242         if (c = s[1]) == ?/ then
243           finish = false    # content out of root element
244           break
245         elsif c == ?! then
246           if s[2] == ?- and s[3] == ?- then
247             scan_comment s
248           else
249             finish = false  # content out of root element
250             break
251           end
252         elsif c == ?? then
253           scan_pi s
254         else
255           parse_error "another root element is found"  # stag
256           finish = false
257           break
258         end
259       else
260         if s.strip.empty? then
261           on_prolog_space s
262         else
263           finish = false    # content out of root element
264           break
265         end
266       end
267       s = src.get
268     end
269 
270   end until finish
271 
272 end

Private Instance Methods

on_attr_value(str, *a) click to toggle source
    # File lib/xmlscan/parser.rb
130 def on_attr_value(str, *a)
131   str.tr! "\t\r\n", ' '   # normalize
132   @visitor.on_attr_value str
133 end
on_attribute(name, *a) click to toggle source
    # File lib/xmlscan/parser.rb
123 def on_attribute(name, *a)
124   unless @attr.check_unique name then
125     wellformed_error "doubled attribute `#{name}'"
126   end
127   @visitor.on_attribute name
128 end
on_doctype(name, pubid, sysid, *a) click to toggle source
   # File lib/xmlscan/parser.rb
66 def on_doctype(name, pubid, sysid, *a)
67   if pubid and not sysid then
68     parse_error "public external ID must have both public ID and system ID"
69   end
70   @visitor.on_doctype name, pubid, sysid
71 end
on_etag(name, *a) click to toggle source
    # File lib/xmlscan/parser.rb
147 def on_etag(name, *a)
148   last = @elem.pop
149   if last == name then
150     @visitor.on_etag name, *a
151   elsif last then
152     wellformed_error "element type `#{name}' is not matched"
153     @visitor.on_etag last, *a
154   else
155     parse_error "end tag `#{name}' appears alone"
156   end
157 end
on_pi(target, pi, *a) click to toggle source
   # File lib/xmlscan/parser.rb
79 def on_pi(target, pi, *a)
80   if target.downcase == 'xml' then
81     parse_error "reserved PI target `#{target}'"
82   end
83   @visitor.on_pi target, pi
84 end
on_prolog_space(s, *a) click to toggle source
   # File lib/xmlscan/parser.rb
74 def on_prolog_space(s, *a)
75   # just ignore it.
76 end
on_stag(name, *a) click to toggle source

def on_attr_charref_hex(code)

on_attr_charref code

end

    # File lib/xmlscan/parser.rb
117 def on_stag(name, *a)
118   @elem.push name
119   @visitor.on_stag name
120   @attr.clear
121 end
on_stag_end(name, *a) click to toggle source
    # File lib/xmlscan/parser.rb
135 def on_stag_end(name, *a)
136   @visitor.on_stag_end name, *a
137 end
on_stag_end_empty(name, *a) click to toggle source
    # File lib/xmlscan/parser.rb
139 def on_stag_end_empty(name, *a)
140   # @visitor.on_stag_end name
141   # @elem.pop
142   # @visitor.on_etag name
143   @visitor.on_stag_end_empty name, *a
144   @elem.pop
145 end
on_xmldecl_standalone(str, *a) click to toggle source
   # File lib/xmlscan/parser.rb
54 def on_xmldecl_standalone(str, *a)
55   if str == 'yes' then
56     @standalone = true
57   elsif str == 'no' then
58     @standalone = false
59   else
60     parse_error "standalone declaration must be either `yes' or `no'"
61   end
62   @visitor.on_xmldecl_standalone str
63 end
on_xmldecl_version(str, *a) click to toggle source
   # File lib/xmlscan/parser.rb
46 def on_xmldecl_version(str, *a)
47   unless str == '1.0' then
48     warning "unsupported XML version `#{str}'"
49   end
50   @visitor.on_xmldecl_version str
51 end