class WirisPlugin::SAXParser

Constants

ATTRIB
BEGIN_NODE
BODY
CDATA
CHAR_AMPERSAND
CHAR_AT
CHAR_BACKSLASH
CHAR_BAR
CHAR_CARRIAGE_RETURN
CHAR_CLOSE_BRACKET
CHAR_COLON
CHAR_DOT
CHAR_DOUBLE_QUOT
CHAR_EXCLAMATION
CHAR_GREATER_THAN
CHAR_HASH
CHAR_HYPHEN
CHAR_INTERROGATION
CHAR_LESS_THAN
CHAR_LINE_FEED
CHAR_OPEN_BRACKET
CHAR_OPEN_SQUARE_BRACKET
CHAR_QUOT
CHAR_SEMICOLON
CHAR_SPACE
CHAR_TAB
CHAR_UNDERSCORE
CHAR_X
CHILDS
COMMENT
IGNORE_SPACES
MALFORMED_XML
TAG_NAME
TAG_NAME_CLOSE

Attributes

columnNumber[RW]
current[RW]
entityResolvers[RW]
index[RW]
iterator[RW]
last[RW]
lineNumber[RW]
xml[RW]

Public Class Methods

formatLineEnds(data) click to toggle source
# File lib/com/wiris/util/xml/SAXParser.rb, line 435
def self.formatLineEnds(data)
    if (data == "")
        return data
    end
    sb = StringBuf.new()
    it = Utf8::getIterator(data)
    carriageReturn = false
    while it::hasNext()
        c = it::next()
        if c == CHAR_CARRIAGE_RETURN
            carriageReturn = true
            sb::addChar(CHAR_LINE_FEED)
        else 
            if carriageReturn
                carriageReturn = false
                if c != CHAR_LINE_FEED
                    sb::addChar(c)
                end
            else 
                sb::addChar(c)
            end
        end
    end
    return sb::toString()
end
isValidCharacter(c) click to toggle source
# File lib/com/wiris/util/xml/SAXParser.rb, line 311
def self.isValidCharacter(c)
    return (((((WCharacterBase::isLetter(c) || WCharacterBase::isDigit(c)) || (c == CHAR_UNDERSCORE)) || (c == CHAR_HYPHEN)) || (c == CHAR_DOT)) || (c == CHAR_COLON))
end
isValidInitCharacter(c) click to toggle source
# File lib/com/wiris/util/xml/SAXParser.rb, line 308
def self.isValidInitCharacter(c)
    return ((WCharacterBase::isLetter(c) || (c == CHAR_UNDERSCORE)) || (c == CHAR_COLON))
end
new() click to toggle source
Calls superclass method
# File lib/com/wiris/util/xml/SAXParser.rb, line 51
def initialize()
    super()
    @lineNumber = -1
    @columnNumber = -1
    @entityResolvers = Array.new()
end

Public Instance Methods

addEntityResolver(e) click to toggle source
# File lib/com/wiris/util/xml/SAXParser.rb, line 63
def addEntityResolver(e)
    @entityResolvers::push(e)
end
currentIsBlank() click to toggle source
# File lib/com/wiris/util/xml/SAXParser.rb, line 460
def currentIsBlank()
    return ((((self.current == CHAR_SPACE) || (self.current == CHAR_LINE_FEED)) || (self.current == CHAR_CARRIAGE_RETURN)) || (self.current == CHAR_TAB))
end
getColumnNumber() click to toggle source
# File lib/com/wiris/util/xml/SAXParser.rb, line 60
def getColumnNumber()
    return @columnNumber
end
getLineNumber() click to toggle source
# File lib/com/wiris/util/xml/SAXParser.rb, line 57
def getLineNumber()
    return @lineNumber
end
ignoreSpaces() click to toggle source
# File lib/com/wiris/util/xml/SAXParser.rb, line 463
def ignoreSpaces()
    while self.currentIsBlank()
        nextChar()
    end
end
nextChar() click to toggle source
# File lib/com/wiris/util/xml/SAXParser.rb, line 387
def nextChar()
    if self.iterator::hasNext()
        self.last = self.current
        self.current = self.iterator::next()
        self.index += (Utf8::uchr(self.current))::length()
        if (self.last == CHAR_LINE_FEED) || ((self.last == CHAR_CARRIAGE_RETURN) && (self.current != CHAR_LINE_FEED))
            self.columnNumber = 1
            self.lineNumber+=1
        else 
            self.columnNumber+=1
        end
    else 
        self.current = -1
    end
end
parse(source, c) click to toggle source
# File lib/com/wiris/util/xml/SAXParser.rb, line 66
def parse(source, c)
    if source::length() > 0
        @lineNumber = 1
        @columnNumber = 0
    end
    self.xml = source
    @iterator = Utf8::getIterator(self.xml)
    c::startDocument()
    state = IGNORE_SPACES
    nextState = BEGIN
    names = Array.new()
    attribs = Attributes.new()
    self.index = 0
    lastIndex = 0
    characters = StringBuf.new()
    nextChar()
    while self.current != -1
        if state == BEGIN
            if self.current == CHAR_LESS_THAN
                state = BEGIN_NODE
                nextState = BEGIN
            else 
                raise Exception,MALFORMED_XML
            end
        else 
            if state == BEGIN_NODE
                if self.current == CHAR_EXCLAMATION
                    nextChar()
                    if self.current == CHAR_HYPHEN
                        nextChar()
                        if self.current == CHAR_HYPHEN
                            state = COMMENT
                                next
                        else 
                            raise Exception,MALFORMED_XML
                        end
                    else 
                        if self.current == CHAR_OPEN_SQUARE_BRACKET
                            if self.searchString("CDATA[")
                                state = CDATA
                            else 
                                raise Exception,MALFORMED_XML
                            end
                        else 
                            raise Exception,MALFORMED_XML
                        end
                    end
                else 
                    ch = characters::toString()
                    if !(ch == "")
                        @columnNumber -= 2
                        c::characters(ch)
                        @columnNumber += 2
                    end
                    characters = StringBuf.new()
                    if self.current == CHAR_INTERROGATION
                        state = HEADER
                    else 
                        if self.current == CHAR_BAR
                            state = TAG_NAME_CLOSE
                        else 
                            if SAXParser.isValidInitCharacter(self.current)
                                state = TAG_NAME
                                    next
                            else 
                                raise Exception,MALFORMED_XML
                            end
                        end
                    end
                end
            else 
                if state == TAG_NAME
                    sb = StringBuf.new()
                    while SAXParser.isValidCharacter(self.current)
                        sb::addChar(self.current)
                        nextChar()
                    end
                    tagName = sb::toString()
                    names::push(tagName)
                    if self.currentIsBlank()
                        state = IGNORE_SPACES
                        nextState = BODY
                    else 
                        if self.current == CHAR_BAR
                            state = BODY
                                next
                        else 
                            if self.current == CHAR_GREATER_THAN
                                c::startElement("","",tagName,Attributes.new())
                                state = CHILDS
                            else 
                                raise Exception,MALFORMED_XML
                            end
                        end
                    end
                else 
                    if state == TAG_NAME_CLOSE
                        sb = StringBuf.new()
                        while SAXParser.isValidCharacter(self.current)
                            sb::addChar(self.current)
                            nextChar()
                        end
                        tagName = sb::toString()
                        self.ignoreSpaces()
                        name = names::_(names::length() - 1)
                        if (self.current == CHAR_GREATER_THAN) && (tagName == name)
                            names::pop()
                            c::endElement("","",tagName)
                            state = CHILDS
                        else 
                            raise Exception,("Expected </" + tagName) + ">"
                        end
                    else 
                        if state == IGNORE_SPACES
                            if !self.currentIsBlank()
                                state = nextState
                                    next
                            end
                        else 
                            if state == COMMENT
                                if self.searchString("-->")
                                    state = nextState
                                else 
                                    raise Exception,"Comment not closed."
                                end
                            else 
                                if state == BODY
                                    if self.current == CHAR_BAR
                                        nextChar()
                                        if self.current == CHAR_GREATER_THAN
                                            tagName = names::pop()
                                            c::startElement("","",tagName,attribs)
                                            attribs = Attributes.new()
                                            c::endElement("","",tagName)
                                            state = CHILDS
                                        else 
                                            raise Exception,MALFORMED_XML
                                        end
                                    else 
                                        if self.current == CHAR_GREATER_THAN
                                            c::startElement("","",names::_(names::length() - 1),attribs)
                                            attribs = Attributes.new()
                                            state = CHILDS
                                        else 
                                            if SAXParser.isValidInitCharacter(self.current)
                                                state = ATTRIB
                                                    next
                                            else 
                                                raise Exception,MALFORMED_XML
                                            end
                                        end
                                    end
                                else 
                                    if state == HEADER
                                        if self.searchString("?>")
                                            state = IGNORE_SPACES
                                            nextState = BEGIN
                                        else 
                                            raise Exception,MALFORMED_XML
                                        end
                                    else 
                                        if state == ATTRIB
                                            if self.searchString("=")
                                                attribName = Std::substr(self.xml,lastIndex,(self.index - lastIndex) - 1)
                                                nextChar()
                                                ignoreSpaces()
                                                lastIndex = self.index
                                                if self.current == CHAR_DOUBLE_QUOT
                                                    nextChar()
                                                    if !self.searchString("\"")
                                                        raise Exception,MALFORMED_XML
                                                    end
                                                else 
                                                    if self.current == CHAR_QUOT
                                                        nextChar()
                                                        if !self.searchString("\'")
                                                            raise Exception,MALFORMED_XML
                                                        end
                                                    else 
                                                        raise Exception,MALFORMED_XML
                                                    end
                                                end
                                                value = Std::substr(self.xml,lastIndex,(self.index - lastIndex) - 1)
                                                value = SAXParser.formatLineEnds(value)
                                                if attribs::getValueFromName(attribName) != nil
                                                    raise Exception,("Attribute " + attribName) + " already used in this tag."
                                                else 
                                                    attribs::add(attribName,parseEntities(value))
                                                end
                                            else 
                                                raise Exception,MALFORMED_XML
                                            end
                                            state = IGNORE_SPACES
                                            nextState = BODY
                                        else 
                                            if state == CHILDS
                                                if self.searchString("<")
                                                    pcdata = Std::substr(self.xml,lastIndex,(self.index - lastIndex) - 1)
                                                    parsedPCData = parseEntities(pcdata)
                                                    parsedPCData = SAXParser.formatLineEnds(parsedPCData)
                                                    characters::add(parsedPCData)
                                                    state = BEGIN_NODE
                                                    nextState = CHILDS
                                                else 
                                                    if self.current != -1
                                                        raise Exception,MALFORMED_XML
                                                    end
                                                end
                                            else 
                                                if state == CDATA
                                                    if self.searchString("]]>")
                                                        cdata = Std::substr(self.xml,lastIndex,(self.index - lastIndex) - 3)
                                                        cdata = SAXParser.formatLineEnds(cdata)
                                                        characters::add(cdata)
                                                        state = CHILDS
                                                        nextState = BEGIN
                                                    else 
                                                        raise Exception,MALFORMED_XML
                                                    end
                                                end
                                            end
                                        end
                                    end
                                end
                            end
                        end
                    end
                end
            end
        end
        lastIndex = self.index
        nextChar()
    end
    remainder = characters::toString()
    if !(remainder == "")
        c::characters(remainder)
    end
    if names::length() > 0
        raise Exception,MALFORMED_XML
    end
    c::endDocument()
end
parseEntities(pcdata) click to toggle source
# File lib/com/wiris/util/xml/SAXParser.rb, line 314
def parseEntities(pcdata)
    if (pcdata == nil) || (pcdata == "")
        return ""
    end
    in1 = pcdata::indexOf("&")
    if in1 == -1
        return pcdata
    end
    in2 = pcdata::indexOf(";",in1)
    parsed = StringBuf.new()
    parsed::add(Std::substr(pcdata,0,in1))
    while ((in2 != -1) && (in1 < pcdata::length())) && (in2 < pcdata::length())
        in1+=1
        entity = Std::substr(pcdata,in1,in2 - in1)
        in2+=1
        if (entity == "quot")
            parsed::add("\"")
        else 
            if (entity == "lt")
                parsed::add("<")
            else 
                if (entity == "gt")
                    parsed::add(">")
                else 
                    if (entity == "apos")
                        parsed::add("\'")
                    else 
                        if (entity == "amp")
                            parsed::add("&")
                        else 
                            if Std::charCodeAt(entity,0) == CHAR_HASH
                                utfvalue = 0
                                if Std::charCodeAt(entity,1) == CHAR_X
                                    value = Std::substr(entity,2)
                                    utfvalue = Std::parseInt("0x" + value)
                                else 
                                    value = Std::substr(entity,1)
                                    utfvalue = Std::parseInt(value)
                                end
                                if utfvalue == 0
                                    raise Exception,"Invalid numeric entity."
                                end
                                newvalue = Utf8::uchr(utfvalue)
                                parsed::add(newvalue)
                            else 
                                r = 0
                                sol = -1
                                while (r < @entityResolvers::length()) && (sol == -1)
                                    sol = @entityResolvers::_(r)::resolveEntity(entity)
                                    r+=1
                                end
                                if sol != -1
                                    parsed::add(Utf8::uchr(sol))
                                else 
                                    parsed::add(("&" + entity) + ";")
                                end
                            end
                        end
                    end
                end
            end
        end
        in1 = pcdata::indexOf('&',in2)
        if in1 == -1
            parsed::add(Std::substr(pcdata,in2))
            in2 = pcdata::length()
        else 
            parsed::add(Std::substr(pcdata,in2,in1 - in2))
            in2 = pcdata::indexOf(';',in1)
        end
    end
    return parsed::toString()
end
searchString(search) click to toggle source
# File lib/com/wiris/util/xml/SAXParser.rb, line 402
def searchString(search)
    n = search::length()
    if n == 0
        return true
    end
    i = 0
    while self.current != -1
        if self.current == Utf8::charCodeAt(search,i)
            i+=1
            if i == n
                return true
            end
        else 
            while i > 0
                if self.current == Utf8::charCodeAt(search,i - 1)
                    j = i - 1
                    while j > 1
                        if Utf8::charCodeAt(search,j) != Utf8::charCodeAt(search,j - 1)
                            i-=1
                            break
                        end
                        j-=1
                    end
                    break
                else 
                    i-=1
                end
            end
        end
        nextChar()
    end
    return false
end