class Tokenizer
Attributes
Tags[RW]
html[RW]
type[RW]
Public Class Methods
new(html)
click to toggle source
# File lib/rLexer/tokenizer.rb, line 6 def initialize(html) @html = html.gsub('"', '\'') @html.strip! @type = :EOF @tokens = [] end
Public Instance Methods
close_tag?(char)
click to toggle source
# File lib/rLexer/tokenizer.rb, line 150 def close_tag?(char) char == Tags::CLOSE_TAG end
comment_end(idx)
click to toggle source
# File lib/rLexer/tokenizer.rb, line 115 def comment_end(idx) return if not @type == :COMMENT if comment_end?(idx) set_type(idx) end end
comment_end?(idx)
click to toggle source
# File lib/rLexer/tokenizer.rb, line 122 def comment_end?(idx) suitable?(idx, Tags::END_COMMENT) end
comment_start?(idx)
click to toggle source
# File lib/rLexer/tokenizer.rb, line 138 def comment_start?(idx) suitable?(idx, Tags::START_COMMENT) end
consume(idx)
click to toggle source
# File lib/rLexer/tokenizer.rb, line 28 def consume(idx) if @type == :COMMENT consume_comment(idx) elsif @type == :OPEN or @type == :CLOSE consume_tag(idx) elsif @type == :DOCTYPE #consume_doctype(idx) elsif @type == :DATA consume_data(idx) end end
consume_attributes()
click to toggle source
# File lib/rLexer/tokenizer.rb, line 77 def consume_attributes atts_new = [] @tokens.each.with_index do |token, i| atts = token[1].split(' ')[1..-1] if token[0] == :OPEN and !atts[0].nil? atts_new.push([i, atts.join(' ').split("' ")]) end @tokens[i][1] = @tokens[i][1].split(' ')[0] unless @tokens[i][0] == :COMMENT or @tokens[i][0] == :DATA end c = 1 atts_new.each.with_index do |x| @tokens.insert(x[0] + c, [:ATTRIBUTES, x[1]]) c += 1 end end
consume_comment(idx)
click to toggle source
# File lib/rLexer/tokenizer.rb, line 58 def consume_comment(idx) slice = @html[idx..-1] slice = slice[Tags::START_COMMENT.length..end_comment_index(slice)] set_token(slice) end
consume_data(idx)
click to toggle source
# File lib/rLexer/tokenizer.rb, line 93 def consume_data(idx) return if next_char?(idx) slice = @html[idx..-1] slice = slice[Tags::CLOSE_TAG.length..slice.index(Tags::OPEN_TAG) || slice.length] set_token(slice) unless slice == '' end
consume_tag(idx)
click to toggle source
# File lib/rLexer/tokenizer.rb, line 64 def consume_tag(idx) slice = @html[idx..-1] if slice.index(Tags::CLOSE_TAG).nil? index = -1 else index = slice.index(Tags::CLOSE_TAG) - 1 end slice = slice[tag_index(slice)..index] set_token(slice) end
current_char(idx)
click to toggle source
# File lib/rLexer/tokenizer.rb, line 102 def current_char(idx) @html[idx] end
doctype?(idx)
click to toggle source
# File lib/rLexer/tokenizer.rb, line 134 def doctype?(idx) false end
end_comment_index(html)
click to toggle source
# File lib/rLexer/tokenizer.rb, line 106 def end_comment_index(html) idx = html.index(Tags::END_COMMENT) (not idx.nil?) ? (idx + 2) - Tags::END_COMMENT.length : -1 end
end_tag?(idx)
click to toggle source
# File lib/rLexer/tokenizer.rb, line 130 def end_tag?(idx) suitable?(idx, Tags::CLOSING_TAG) end
next_char?(idx)
click to toggle source
# File lib/rLexer/tokenizer.rb, line 126 def next_char?(idx) @html[idx +1] == Tags::OPEN_TAG or @html[idx +1].nil? end
open_tag?(char)
click to toggle source
# File lib/rLexer/tokenizer.rb, line 146 def open_tag?(char) char == Tags::OPEN_TAG end
process(idx)
click to toggle source
# File lib/rLexer/tokenizer.rb, line 24 def process(idx) set_type(idx); consume(idx) end
set_token(slice)
click to toggle source
# File lib/rLexer/tokenizer.rb, line 54 def set_token(slice) @tokens.push([@type, slice]) end
set_type(idx)
click to toggle source
# File lib/rLexer/tokenizer.rb, line 40 def set_type(idx) if comment_start?(idx) @type = :COMMENT elsif end_tag?(idx) @type = :CLOSE elsif doctype?(idx) @type = :DOCTYPE elsif close_tag?(current_char(idx)) or comment_end?(idx) @type = :DATA elsif open_tag?(current_char(idx)) @type = :OPEN end end
suitable?(idx, tag)
click to toggle source
# File lib/rLexer/tokenizer.rb, line 142 def suitable?(idx, tag) tag == @html.byteslice(idx, tag.length) end
tag_index(html)
click to toggle source
# File lib/rLexer/tokenizer.rb, line 111 def tag_index(html) (@type == :OPEN) ? Tags::OPEN_TAG.length : Tags::CLOSING_TAG.length end
tokenize()
click to toggle source
# File lib/rLexer/tokenizer.rb, line 13 def tokenize @html.each_char.with_index do |ch, idx| comment_end(idx) next if @type == :COMMENT if open_tag?(ch) or close_tag?(ch) process(idx) end end consume_attributes end
tokens()
click to toggle source
# File lib/rLexer/tokenizer.rb, line 154 def tokens @tokens end