class WirisPlugin::SAXParser
Constants
- ATTRIB
- BEGIN_NODE
- BODY
- CDATA
- CHAR_AMPERSAND
- CHAR_AT
- CHAR_BACKSLASH
- CHAR_BAR
- CHAR_CARRIAGE_RETURN
- CHAR_CLOSE_BRACKET
- CHAR_COLON
- CHAR_DOT
- CHAR_DOUBLE_QUOT
- CHAR_EXCLAMATION
- CHAR_GREATER_THAN
- CHAR_HASH
- CHAR_HYPHEN
- CHAR_INTERROGATION
- CHAR_LESS_THAN
- CHAR_LINE_FEED
- CHAR_OPEN_BRACKET
- CHAR_OPEN_SQUARE_BRACKET
- CHAR_QUOT
- CHAR_SEMICOLON
- CHAR_SPACE
- CHAR_TAB
- CHAR_UNDERSCORE
- CHAR_X
- CHILDS
- COMMENT
- HEADER
- IGNORE_SPACES
- MALFORMED_XML
- TAG_NAME
- TAG_NAME_CLOSE
Attributes
columnNumber[RW]
current[RW]
entityResolvers[RW]
index[RW]
iterator[RW]
last[RW]
lineNumber[RW]
xml[RW]
Public Class Methods
formatLineEnds(data)
click to toggle source
# File lib/com/wiris/util/xml/SAXParser.rb, line 435 def self.formatLineEnds(data) if (data == "") return data end sb = StringBuf.new() it = Utf8::getIterator(data) carriageReturn = false while it::hasNext() c = it::next() if c == CHAR_CARRIAGE_RETURN carriageReturn = true sb::addChar(CHAR_LINE_FEED) else if carriageReturn carriageReturn = false if c != CHAR_LINE_FEED sb::addChar(c) end else sb::addChar(c) end end end return sb::toString() end
isValidCharacter(c)
click to toggle source
# File lib/com/wiris/util/xml/SAXParser.rb, line 311 def self.isValidCharacter(c) return (((((WCharacterBase::isLetter(c) || WCharacterBase::isDigit(c)) || (c == CHAR_UNDERSCORE)) || (c == CHAR_HYPHEN)) || (c == CHAR_DOT)) || (c == CHAR_COLON)) end
isValidInitCharacter(c)
click to toggle source
# File lib/com/wiris/util/xml/SAXParser.rb, line 308 def self.isValidInitCharacter(c) return ((WCharacterBase::isLetter(c) || (c == CHAR_UNDERSCORE)) || (c == CHAR_COLON)) end
new()
click to toggle source
Calls superclass method
# File lib/com/wiris/util/xml/SAXParser.rb, line 51 def initialize() super() @lineNumber = -1 @columnNumber = -1 @entityResolvers = Array.new() end
Public Instance Methods
addEntityResolver(e)
click to toggle source
# File lib/com/wiris/util/xml/SAXParser.rb, line 63 def addEntityResolver(e) @entityResolvers::push(e) end
currentIsBlank()
click to toggle source
# File lib/com/wiris/util/xml/SAXParser.rb, line 460 def currentIsBlank() return ((((self.current == CHAR_SPACE) || (self.current == CHAR_LINE_FEED)) || (self.current == CHAR_CARRIAGE_RETURN)) || (self.current == CHAR_TAB)) end
getColumnNumber()
click to toggle source
# File lib/com/wiris/util/xml/SAXParser.rb, line 60 def getColumnNumber() return @columnNumber end
getLineNumber()
click to toggle source
# File lib/com/wiris/util/xml/SAXParser.rb, line 57 def getLineNumber() return @lineNumber end
ignoreSpaces()
click to toggle source
# File lib/com/wiris/util/xml/SAXParser.rb, line 463 def ignoreSpaces() while self.currentIsBlank() nextChar() end end
nextChar()
click to toggle source
# File lib/com/wiris/util/xml/SAXParser.rb, line 387 def nextChar() if self.iterator::hasNext() self.last = self.current self.current = self.iterator::next() self.index += (Utf8::uchr(self.current))::length() if (self.last == CHAR_LINE_FEED) || ((self.last == CHAR_CARRIAGE_RETURN) && (self.current != CHAR_LINE_FEED)) self.columnNumber = 1 self.lineNumber+=1 else self.columnNumber+=1 end else self.current = -1 end end
parse(source, c)
click to toggle source
# File lib/com/wiris/util/xml/SAXParser.rb, line 66 def parse(source, c) if source::length() > 0 @lineNumber = 1 @columnNumber = 0 end self.xml = source @iterator = Utf8::getIterator(self.xml) c::startDocument() state = IGNORE_SPACES nextState = BEGIN names = Array.new() attribs = Attributes.new() self.index = 0 lastIndex = 0 characters = StringBuf.new() nextChar() while self.current != -1 if state == BEGIN if self.current == CHAR_LESS_THAN state = BEGIN_NODE nextState = BEGIN else raise Exception,MALFORMED_XML end else if state == BEGIN_NODE if self.current == CHAR_EXCLAMATION nextChar() if self.current == CHAR_HYPHEN nextChar() if self.current == CHAR_HYPHEN state = COMMENT next else raise Exception,MALFORMED_XML end else if self.current == CHAR_OPEN_SQUARE_BRACKET if self.searchString("CDATA[") state = CDATA else raise Exception,MALFORMED_XML end else raise Exception,MALFORMED_XML end end else ch = characters::toString() if !(ch == "") @columnNumber -= 2 c::characters(ch) @columnNumber += 2 end characters = StringBuf.new() if self.current == CHAR_INTERROGATION state = HEADER else if self.current == CHAR_BAR state = TAG_NAME_CLOSE else if SAXParser.isValidInitCharacter(self.current) state = TAG_NAME next else raise Exception,MALFORMED_XML end end end end else if state == TAG_NAME sb = StringBuf.new() while SAXParser.isValidCharacter(self.current) sb::addChar(self.current) nextChar() end tagName = sb::toString() names::push(tagName) if self.currentIsBlank() state = IGNORE_SPACES nextState = BODY else if self.current == CHAR_BAR state = BODY next else if self.current == CHAR_GREATER_THAN c::startElement("","",tagName,Attributes.new()) state = CHILDS else raise Exception,MALFORMED_XML end end end else if state == TAG_NAME_CLOSE sb = StringBuf.new() while SAXParser.isValidCharacter(self.current) sb::addChar(self.current) nextChar() end tagName = sb::toString() self.ignoreSpaces() name = names::_(names::length() - 1) if (self.current == CHAR_GREATER_THAN) && (tagName == name) names::pop() c::endElement("","",tagName) state = CHILDS else raise Exception,("Expected </" + tagName) + ">" end else if state == IGNORE_SPACES if !self.currentIsBlank() state = nextState next end else if state == COMMENT if self.searchString("-->") state = nextState else raise Exception,"Comment not closed." end else if state == BODY if self.current == CHAR_BAR nextChar() if self.current == CHAR_GREATER_THAN tagName = names::pop() c::startElement("","",tagName,attribs) attribs = Attributes.new() c::endElement("","",tagName) state = CHILDS else raise Exception,MALFORMED_XML end else if self.current == CHAR_GREATER_THAN c::startElement("","",names::_(names::length() - 1),attribs) attribs = Attributes.new() state = CHILDS else if SAXParser.isValidInitCharacter(self.current) state = ATTRIB next else raise Exception,MALFORMED_XML end end end else if state == HEADER if self.searchString("?>") state = IGNORE_SPACES nextState = BEGIN else raise Exception,MALFORMED_XML end else if state == ATTRIB if self.searchString("=") attribName = Std::substr(self.xml,lastIndex,(self.index - lastIndex) - 1) nextChar() ignoreSpaces() lastIndex = self.index if self.current == CHAR_DOUBLE_QUOT nextChar() if !self.searchString("\"") raise Exception,MALFORMED_XML end else if self.current == CHAR_QUOT nextChar() if !self.searchString("\'") raise Exception,MALFORMED_XML end else raise Exception,MALFORMED_XML end end value = Std::substr(self.xml,lastIndex,(self.index - lastIndex) - 1) value = SAXParser.formatLineEnds(value) if attribs::getValueFromName(attribName) != nil raise Exception,("Attribute " + attribName) + " already used in this tag." else attribs::add(attribName,parseEntities(value)) end else raise Exception,MALFORMED_XML end state = IGNORE_SPACES nextState = BODY else if state == CHILDS if self.searchString("<") pcdata = Std::substr(self.xml,lastIndex,(self.index - lastIndex) - 1) parsedPCData = parseEntities(pcdata) parsedPCData = SAXParser.formatLineEnds(parsedPCData) characters::add(parsedPCData) state = BEGIN_NODE nextState = CHILDS else if self.current != -1 raise Exception,MALFORMED_XML end end else if state == CDATA if self.searchString("]]>") cdata = Std::substr(self.xml,lastIndex,(self.index - lastIndex) - 3) cdata = SAXParser.formatLineEnds(cdata) characters::add(cdata) state = CHILDS nextState = BEGIN else raise Exception,MALFORMED_XML end end end end end end end end end end end end lastIndex = self.index nextChar() end remainder = characters::toString() if !(remainder == "") c::characters(remainder) end if names::length() > 0 raise Exception,MALFORMED_XML end c::endDocument() end
parseEntities(pcdata)
click to toggle source
# File lib/com/wiris/util/xml/SAXParser.rb, line 314 def parseEntities(pcdata) if (pcdata == nil) || (pcdata == "") return "" end in1 = pcdata::indexOf("&") if in1 == -1 return pcdata end in2 = pcdata::indexOf(";",in1) parsed = StringBuf.new() parsed::add(Std::substr(pcdata,0,in1)) while ((in2 != -1) && (in1 < pcdata::length())) && (in2 < pcdata::length()) in1+=1 entity = Std::substr(pcdata,in1,in2 - in1) in2+=1 if (entity == "quot") parsed::add("\"") else if (entity == "lt") parsed::add("<") else if (entity == "gt") parsed::add(">") else if (entity == "apos") parsed::add("\'") else if (entity == "amp") parsed::add("&") else if Std::charCodeAt(entity,0) == CHAR_HASH utfvalue = 0 if Std::charCodeAt(entity,1) == CHAR_X value = Std::substr(entity,2) utfvalue = Std::parseInt("0x" + value) else value = Std::substr(entity,1) utfvalue = Std::parseInt(value) end if utfvalue == 0 raise Exception,"Invalid numeric entity." end newvalue = Utf8::uchr(utfvalue) parsed::add(newvalue) else r = 0 sol = -1 while (r < @entityResolvers::length()) && (sol == -1) sol = @entityResolvers::_(r)::resolveEntity(entity) r+=1 end if sol != -1 parsed::add(Utf8::uchr(sol)) else parsed::add(("&" + entity) + ";") end end end end end end end in1 = pcdata::indexOf('&',in2) if in1 == -1 parsed::add(Std::substr(pcdata,in2)) in2 = pcdata::length() else parsed::add(Std::substr(pcdata,in2,in1 - in2)) in2 = pcdata::indexOf(';',in1) end end return parsed::toString() end
searchString(search)
click to toggle source
# File lib/com/wiris/util/xml/SAXParser.rb, line 402 def searchString(search) n = search::length() if n == 0 return true end i = 0 while self.current != -1 if self.current == Utf8::charCodeAt(search,i) i+=1 if i == n return true end else while i > 0 if self.current == Utf8::charCodeAt(search,i - 1) j = i - 1 while j > 1 if Utf8::charCodeAt(search,j) != Utf8::charCodeAt(search,j - 1) i-=1 break end j-=1 end break else i-=1 end end end nextChar() end return false end