class XMLScan::XMLParser
Public Instance Methods
parse(*)
click to toggle source
PredefinedEntity = {
'lt' => '<', 'gt' => '>', 'amp' => '&', 'quot' => '"', 'apos' => "'",
}
Calls superclass method
# File lib/xmlscan/parser.rb 36 def parse(*) 37 @elem = [] 38 @attr = AttributeChecker.new 39 @standalone = nil 40 super 41 end
scan_content(s)
click to toggle source
# File lib/xmlscan/parser.rb 163 def scan_content(s) 164 elem = @elem # for speed 165 src = @src # for speed 166 found_root_element = false 167 168 begin 169 170 # -- first start tag -- 171 elem.clear 172 found_stag = false 173 174 while s and not found_stag 175 if (c = s[0]) == ?< then 176 if (c = s[1]) == ?/ then 177 # should be a parse error 178 scan_etag s 179 elsif c == ?! then 180 if s[2] == ?- and s[3] == ?- then 181 scan_comment s 182 elsif /\A<!\[CDATA\[/n =~ s then 183 parse_error "CDATA section is found outside of root element" 184 scan_cdata $' 185 else 186 scan_bang_tag s 187 end 188 elsif c == ?? then 189 scan_pi s 190 else 191 found_root_element = true 192 found_stag = true 193 scan_stag s 194 end 195 else 196 parse_error "content of element is found outside of root element" 197 scan_chardata s 198 end 199 s = src.get 200 end 201 202 if not found_root_element and not found_stag then 203 parse_error "no root element was found" 204 end 205 206 # -- contents -- 207 while s and not elem.empty? 208 if (c = s[0]) == ?< then 209 if (c = s[1]) == ?/ then 210 scan_etag s 211 elsif c == ?! then 212 if s[2] == ?- and s[3] == ?- then 213 scan_comment s 214 elsif /\A<!\[CDATA\[/n =~ s then 215 scan_cdata $' 216 else 217 scan_bang_tag s 218 end 219 elsif c == ?? then 220 scan_pi s 221 else 222 scan_stag s 223 end 224 else 225 scan_chardata s 226 end 227 s = src.get 228 end 229 230 unless elem.empty? then 231 while name = elem.pop 232 parse_error "unclosed element `#{name}' meets EOF" 233 @visitor.on_etag name 234 end 235 end 236 237 # -- epilogue -- 238 finish = true 239 240 while s 241 if (c = s[0]) == ?< then 242 if (c = s[1]) == ?/ then 243 finish = false # content out of root element 244 break 245 elsif c == ?! then 246 if s[2] == ?- and s[3] == ?- then 247 scan_comment s 248 else 249 finish = false # content out of root element 250 break 251 end 252 elsif c == ?? then 253 scan_pi s 254 else 255 parse_error "another root element is found" # stag 256 finish = false 257 break 258 end 259 else 260 if s.strip.empty? then 261 on_prolog_space s 262 else 263 finish = false # content out of root element 264 break 265 end 266 end 267 s = src.get 268 end 269 270 end until finish 271 272 end
Private Instance Methods
on_attr_value(str, *a)
click to toggle source
# File lib/xmlscan/parser.rb 130 def on_attr_value(str, *a) 131 str.tr! "\t\r\n", ' ' # normalize 132 @visitor.on_attr_value str 133 end
on_attribute(name, *a)
click to toggle source
# File lib/xmlscan/parser.rb 123 def on_attribute(name, *a) 124 unless @attr.check_unique name then 125 wellformed_error "doubled attribute `#{name}'" 126 end 127 @visitor.on_attribute name 128 end
on_doctype(name, pubid, sysid, *a)
click to toggle source
# File lib/xmlscan/parser.rb 66 def on_doctype(name, pubid, sysid, *a) 67 if pubid and not sysid then 68 parse_error "public external ID must have both public ID and system ID" 69 end 70 @visitor.on_doctype name, pubid, sysid 71 end
on_etag(name, *a)
click to toggle source
# File lib/xmlscan/parser.rb 147 def on_etag(name, *a) 148 last = @elem.pop 149 if last == name then 150 @visitor.on_etag name, *a 151 elsif last then 152 wellformed_error "element type `#{name}' is not matched" 153 @visitor.on_etag last, *a 154 else 155 parse_error "end tag `#{name}' appears alone" 156 end 157 end
on_pi(target, pi, *a)
click to toggle source
# File lib/xmlscan/parser.rb 79 def on_pi(target, pi, *a) 80 if target.downcase == 'xml' then 81 parse_error "reserved PI target `#{target}'" 82 end 83 @visitor.on_pi target, pi 84 end
on_prolog_space(s, *a)
click to toggle source
# File lib/xmlscan/parser.rb 74 def on_prolog_space(s, *a) 75 # just ignore it. 76 end
on_stag(name, *a)
click to toggle source
def on_attr_charref_hex(code)
on_attr_charref code
end
# File lib/xmlscan/parser.rb 117 def on_stag(name, *a) 118 @elem.push name 119 @visitor.on_stag name 120 @attr.clear 121 end
on_stag_end(name, *a)
click to toggle source
# File lib/xmlscan/parser.rb 135 def on_stag_end(name, *a) 136 @visitor.on_stag_end name, *a 137 end
on_stag_end_empty(name, *a)
click to toggle source
# File lib/xmlscan/parser.rb 139 def on_stag_end_empty(name, *a) 140 # @visitor.on_stag_end name 141 # @elem.pop 142 # @visitor.on_etag name 143 @visitor.on_stag_end_empty name, *a 144 @elem.pop 145 end
on_xmldecl_standalone(str, *a)
click to toggle source
# File lib/xmlscan/parser.rb 54 def on_xmldecl_standalone(str, *a) 55 if str == 'yes' then 56 @standalone = true 57 elsif str == 'no' then 58 @standalone = false 59 else 60 parse_error "standalone declaration must be either `yes' or `no'" 61 end 62 @visitor.on_xmldecl_standalone str 63 end
on_xmldecl_version(str, *a)
click to toggle source
# File lib/xmlscan/parser.rb 46 def on_xmldecl_version(str, *a) 47 unless str == '1.0' then 48 warning "unsupported XML version `#{str}'" 49 end 50 @visitor.on_xmldecl_version str 51 end