class Meteor::Ml::Html4::ParserImpl

HTML4 parser (HTMLパーサ)

Constants

BR_1
BR_2
CHECKED
CHECKED_M
CHECKED_R

CHECKED_M = [' checked ',' checked',' CHECKED ',' CHECKED']

CONTENT
CONTENT_TYPE
DISABLED
DISABLED_M
DISABLED_R

DISABLED_M = [' disabled ',' disiabled',' DISABLED ',' DISABLED']

DISABLE_ELEMENT

DISABLE_ELEMENT = “input|textarea|select|optgroup”

FALSE
GET_ATTRS_MAP2
HTTP_EQUIV
INPUT
KAIGYO_CODE

KAIGYO_CODE = “r?n|r” KAIGYO_CODE = “rn|n|r”

META
META_S
MULTIPLE
MULTIPLE_M
MULTIPLE_R

MULTIPLE_M = [' multiple ',' multiple',' MULTIPLE ',' MULTIPLE']

NBSP_2
NBSP_3
OPTION
PATTERN_ESCAPE
PATTERN_ESCAPE_CONTENT
PATTERN_UNESCAPE
RADIO
READONLY
READONLY_M
READONLY_R

READONLY_M = [' readonly ',' readonly',' READONLY ',' READONLY']

READONLY_TYPE

READONLY_TYPE = “text|password”

SELECT
SELECTED
SELECTED_M

@@pattern_option = Regexp.new(OPTION) @@pattern_selected = Regexp.new(SELECTED) @@pattern_input = Regexp.new(INPUT) @@pattern_checked = Regexp.new(CHECKED) @@pattern_radio = Regexp.new(RADIO) @@pattern_disable_element = Regexp.new(DISABLE_ELEMENT) @@pattern_disabled = Regexp.new(DISABLED) @@pattern_readonly_type = Regexp.new(READONLY_TYPE) @@pattern_textarea = Regexp.new(TEXTAREA) @@pattern_readonly = Regexp.new(READONLY) @@pattern_select = Regexp.new(SELECT) @@pattern_multiple = Regexp.new(MULTIPLE)

SELECTED_R

SELECTED_M = [' selected ',' selected',' SELECTED ',' SELECTED']

TABLE_FOR_ESCAPE_

@@pattern_@@match_tag = Regexp.new(@@match_tag) @@pattern_@@match_tag2 = Regexp.new(@@match_tag_2)

TABLE_FOR_ESCAPE_CONTENT_
TEXTAREA
TRUE
TYPE_L

@@pattern_true = Regexp.new(TRUE) @@pattern_false = Regexp.new(FALSE)

TYPE_U

Public Class Methods

new(*args) click to toggle source

initializer (イニシャライザ) @overload initialize @overload initialize(ps)

@param [Meteor::Parser] ps parser (パーサ)
Calls superclass method Meteor::Core::Kernel::new
# File lib/meteor.rb, line 4160
def initialize(*args)
  super()
  @doc_type = Parser::HTML4
  case args.length
    when ZERO
      #initialize_0
    when ONE
      initialize_1(args[0])
    else
      raise ArgumentError
  end
end

Public Instance Methods

content_type() click to toggle source

get content type (コンテントタイプを取得する) @return [String] conent type (コンテントタイプ)

# File lib/meteor.rb, line 4219
def content_type
  @root.content_type
end
parse() click to toggle source

parse document (ドキュメントを解析する)

# File lib/meteor.rb, line 4197
def parse
  analyze_ml
end

Private Instance Methods

analyze_content_type() click to toggle source

analyze document , set content type (ドキュメントをパースし、コンテントタイプをセットする)

# File lib/meteor.rb, line 4226
def analyze_content_type
  @error_check = false

  element_3(META_S, HTTP_EQUIV, CONTENT_TYPE)

  if !@elm_
    element_3(META, HTTP_EQUIV, CONTENT_TYPE)
  end

  @error_check = true

  if @elm_
    @root.content_type = @elm_.attr(CONTENT)
  else
    @root.content_type = EMPTY
  end
end
analyze_kaigyo_code() click to toggle source

analuze document , set newline (ドキュメントをパースし、改行コードをセットする)

# File lib/meteor.rb, line 4249
def analyze_kaigyo_code
  #改行コード取得

  for a in KAIGYO_CODE
    if @root.document.include?(a)
      @root.kaigyo_code = a
      #puts "kaigyo:" << @root.kaigyo_code
    end
  end

end
analyze_ml() click to toggle source

analyze document (ドキュメントをパースする)

# File lib/meteor.rb, line 4204
def analyze_ml
  #content-typeの取得
  analyze_content_type
  #改行コードの取得
  analyze_kaigyo_code

  @res = nil
end
edit_attrs_(elm, attr_name, attr_value) click to toggle source
Calls superclass method Meteor::Core::Kernel#edit_attrs_
# File lib/meteor.rb, line 4533
def edit_attrs_(elm, attr_name, attr_value)
  if is_match(SELECTED, attr_name) && is_match(OPTION, elm.name)
    edit_attrs_5(elm, attr_name, attr_value, @@pattern_selected_m, @@pattern_selected_r)
  elsif is_match(MULTIPLE, attr_name) && is_match(SELECT, elm.name)
    edit_attrs_5(elm, attr_name, attr_value, @@pattern_multiple_m, @@pattern_multiple_r)
  elsif is_match(DISABLED, attr_name) && is_match(DISABLE_ELEMENT, elm.name)
    edit_attrs_5(elm, attr_name, attr_value, @@pattern_disabled_m, @@pattern_disabled_r)
  elsif is_match(CHECKED, attr_name) && is_match(INPUT, elm.name) && is_match(RADIO, get_type(elm))
    edit_attrs_5(elm, attr_name, attr_value, @@pattern_checked_m, @@pattern_checked_r)
  elsif is_match(READONLY, attr_name) && (is_match(TEXTAREA, elm.name) || (is_match(INPUT, elm.name) && is_match(READONLY_TYPE, get_type(elm))))
    edit_attrs_5(elm, attr_name, attr_value, @@pattern_readonly_m, @@pattern_readonly_r)
  else
    super(elm, attr_name, attr_value)
  end
end
edit_attrs_5(elm, attr_name, attr_value, match_p, replace) click to toggle source
# File lib/meteor.rb, line 4551
def edit_attrs_5(elm, attr_name, attr_value, match_p, replace)

  if true.equal?(attr_value) || is_match(TRUE, attr_value)
    @res = match_p.match(elm.attributes)

    if !@res
      if !EMPTY.eql?(elm.attributes) && !EMPTY.eql?(elm.attributes.strip)
        elm.attributes = '' << SPACE << elm.attributes.strip
      else
        elm.attributes = ''
      end
      elm.attributes << SPACE << attr_name
      #else
    end
  elsif false.equal?(attr_value) || is_match(FALSE, attr_value)
    elm.attributes.sub!(replace, EMPTY)
  end

end
edit_document_1(elm) click to toggle source
# File lib/meteor.rb, line 4573
def edit_document_1(elm)
  edit_document_2(elm, TAG_CLOSE)
end
element_1(name) click to toggle source

get element using tag name (要素のタグ名で検索し、要素を取得する) @param [String] name tag name (タグ名) @return [Meteor::Element] element (要素)

# File lib/meteor.rb, line 4268
def element_1(name)
  @_name = Regexp.quote(name)

  #空要素の場合(<->内容あり要素の場合)
  if is_match(@@match_tag, name)
    #空要素検索用パターン
    @pattern_cc = '' << TAG_OPEN << @_name << TAG_SEARCH_1_4_2
    #@pattern_cc = "<#{@_name}(|\\s[^<>]*)>"
    @pattern = Meteor::Core::Util::PatternCache.get(@pattern_cc)
    @res = @pattern.match(@root.document)
    if @res
      element_without_1(name)
    else
      if @error_check
        puts Meteor::Exception::NoSuchElementException.new(name).message
      end
      @elm_ = nil
    end
  else
    #内容あり要素検索用パターン
    #@pattern_cc = '' << TAG_OPEN << @_name << TAG_SEARCH_1_1 << @_name
    #@pattern_cc << TAG_SEARCH_1_2 << @_name << TAG_CLOSE
    @pattern_cc = "<#{@_name}(|\\s[^<>]*)>(((?!(#{tag}[^<>]*>)).)*)<\\/#{@_name}>"

    @pattern = Meteor::Core::Util::PatternCache.get(@pattern_cc)
    #内容あり要素検索
    @res = @pattern.match(@root.document)
    #内容あり要素の場合
    if @res
      element_with_1(name)
    else
      if @error_check
        puts Meteor::Exception::NoSuchElementException.new(name).message
      end
      @elm_ = nil
    end
  end

  @elm_
end
element_2(attr_name, attr_value) click to toggle source

get element using attribute(name=“value”) (属性(属性名=“属性値”)で検索し、要素を取得する) @param [String] attr_name attribute name (属性名) @param [String] attr_value attribute value (属性値) @return [Meteor::Element] element (要素)

# File lib/meteor.rb, line 4396
def element_2(attr_name, attr_value)

  element_quote_2(attr_name, attr_value)

  #@pattern_cc = '' << TAG_SEARCH_3_1 << @_attr_name << ATTR_EQ << @_attr_value
  #@pattern_cc << TAG_SEARCH_2_4_4
  @pattern_cc = "<([^<>\"]*)\\s[^<>]*#{@_attr_name}=\"#{@_attr_value}\"[^<>]*>"

  @pattern = Meteor::Core::Util::PatternCache.get(@pattern_cc)
  @res = @pattern.match(@root.document)

  if @res
    element_3(@res[1], attr_name, attr_value)
  else
    if @error_check
      puts Meteor::Exception::NoSuchElementException.new(attr_name, attr_value).message
    end
    @elm_ = nil
  end

  @elm_
end
element_3(name, attr_name, attr_value) click to toggle source

get element using tag name and attribute(name=“value”) (要素のタグ名、属性(属性名=“属性値”)で検索し、要素を取得する) @param [String] name tag name (タグ名) @param [String] attr_name attribute name (属性名) @param [String] attr_value attribute value (属性値) @return [Meteor::Element] element (要素)

# File lib/meteor.rb, line 4332
def element_3(name, attr_name, attr_value)

  element_quote_3(name, attr_name, attr_value)

  #空要素の場合(<->内容あり要素の場合)
  if is_match(@@match_tag, name)
    #空要素検索パターン
    #@pattern_cc = '' << TAG_OPEN << @_name << TAG_SEARCH_2_1 << @_attr_name << ATTR_EQ
    #@pattern_cc << @_attr_value << TAG_SEARCH_2_4_3
    @pattern_cc = "<#{@_name}(\\s[^<>]*#{@_attr_name}=\"#{@_attr_value}\"[^<>]*)>"

    @pattern = Meteor::Core::Util::PatternCache.get(@pattern_cc)
    #空要素検索
    @res = @pattern.match(@root.document)
    if @res
      element_without_3(name)
    else
      if @error_check
        puts Meteor::Exception::NoSuchElementException.new(name, attr_name, attr_value).message
      end
      @elm_ = nil
    end
  else
    #内容あり要素検索パターン
    #@pattern_cc = '' << TAG_OPEN << @_name << TAG_SEARCH_2_1 << @_attr_name << ATTR_EQ
    #@pattern_cc << @_attr_value << TAG_SEARCH_2_2 << @_name
    #@pattern_cc << TAG_SEARCH_1_2 << @_name << TAG_CLOSE
    @pattern_cc = "<#{@_name}(\\s[^<>]*#{@_attr_name}=\"#{@_attr_value}\"[^<>]*)>(((?!(#{@_name}[^<>]*>)).)*)<\\/#{@_name}>"

    @pattern = Meteor::Core::Util::PatternCache.get(@pattern_cc)
    #内容あり要素検索
    @res = @pattern.match(@root.document)

    if !@res && !is_match(@@match_tag_sng, name)
      @res = element_with_3_2
    end

    if @res
      element_with_3_1(name)
    else
      if @error_check
        puts Meteor::Exception::NoSuchElementException.new(name, attr_name, attr_value).message
      end
      @elm_ = nil
    end
  end

  @elm_
end
element_4(attr_name1, attr_value1, attr_name2, attr_value2) click to toggle source

get element using attribute1,2(name=“value”) (属性1・属性2(属性名=“属性値”)で検索し、要素を取得する)

@param [String] attr_name1 attribute name1 (属性名1) @param [String] attr_value1 attribute value1 (属性値1) @param [String] attr_name2 attribute name2 (属性名2) @param [String] attr_value2 attribute value2 (属性値2) @return [Meteor::Element] element (要素)

# File lib/meteor.rb, line 4504
def element_4(attr_name1, attr_value1, attr_name2, attr_value2)

  element_quote_4(attr_name1, attr_value1, attr_name2, attr_value2)

  #@pattern_cc = '' << TAG_SEARCH_3_1_2_2 << @_attr_name1 << ATTR_EQ << @_attr_value1
  #@pattern_cc << TAG_SEARCH_2_6 << @_attr_name2 << ATTR_EQ << @_attr_value2
  #@pattern_cc << TAG_SEARCH_2_7 << @_attr_name2 << ATTR_EQ << @_attr_value2
  #@pattern_cc << TAG_SEARCH_2_6 << @_attr_name1 << ATTR_EQ << @_attr_value1
  #@pattern_cc << TAG_SEARCH_2_4_3_2
  @pattern_cc = "<([^<>\"]*)\\s([^<>]*(#{@_attr_name1}=\"#{@_attr_value1}\"[^<>]*#{@_attr_name2}=\"#{@_attr_value2}\"|#{@_attr_name2}=\"#{@_attr_value2}\"[^<>]*#{@_attr_name1}=\"#{@_attr_value1}\")[^<>]*)>"

  @pattern = Meteor::Core::Util::PatternCache.get(@pattern_cc)

  @res = @pattern.match(@root.document)

  if @res
    element_5(@res[1], attr_name1, attr_value1, attr_name2, attr_value2)
  else
    if @error_check
      puts Meteor::Exception::NoSuchElementException.new(attr_name1, attr_value1, attr_name2, attr_value2).message
    end
    @elm_ = nil
  end

  @elm_
end
element_5(name, attr_name1, attr_value1, attr_name2, attr_value2) click to toggle source

get element using tag name and attribute1,2(name=“value”) (要素のタグ名と属性1・属性2(属性名=“属性値”)で検索し、要素を取得する) @param [String] name tag name (タグ名) @param [String] attr_name1 attribute name1 (属性名1) @param [String] attr_value1 attribute value1 (属性値1) @param [String] attr_name2 attribute name2 (属性名2) @param [String] attr_value2 attribute value2 (属性値2) @return [Meteor::Element] element (要素)

# File lib/meteor.rb, line 4430
def element_5(name, attr_name1, attr_value1, attr_name2, attr_value2)

  element_quote_5(name, attr_name1, attr_value1, attr_name2, attr_value2)

  #空要素の場合(<->内容あり要素の場合)
  if is_match(@@match_tag, name)
    #空要素検索パターン
    #@pattern_cc = '' << TAG_OPEN << @_name << TAG_SEARCH_2_1_2 << @_attr_name1 << ATTR_EQ
    #@pattern_cc << @_attr_value1 << TAG_SEARCH_2_6 << @_attr_name2 << ATTR_EQ
    #@pattern_cc << @_attr_value2 << TAG_SEARCH_2_7 << @_attr_name2 << ATTR_EQ
    #@pattern_cc << @_attr_value2 << TAG_SEARCH_2_6 << @_attr_name1 << ATTR_EQ
    #@pattern_cc << @_attr_value1 << TAG_SEARCH_2_4_3_2
    @pattern_cc = "<#{@_name}(\\s[^<>]*(?:#{@_attr_name1}=\"#{@_attr_value1}\"[^<>]*#{@_attr_name2}=\"#{@_attr_value2}\"|#{@_attr_name2}=\"#{@_attr_value2}\"[^<>]*#{@_attr_name1}=\"#{@_attr_value1}\")[^<>]*)>"

    @pattern = Meteor::Core::Util::PatternCache.get(@pattern_cc)
    #空要素検索
    @res = @pattern.match(@root.document)

    if @res
      element_without_5(name)
    else
      if @error_check
        puts Meteor::Exception::NoSuchElementException.new(name, attr_name1, attr_value1, attr_name2, attr_value2).message
      end
      @elm_ = nil
    end
  else
    #内容あり要素検索パターン
    #@pattern_cc = '' << TAG_OPEN << @_name << TAG_SEARCH_2_1_2 << @_attr_name1 << ATTR_EQ
    #@pattern_cc << @_attr_value1 << TAG_SEARCH_2_6 << @_attr_name2 << ATTR_EQ
    #@pattern_cc << @_attr_value2 << TAG_SEARCH_2_7 << @_attr_name2 << ATTR_EQ
    #@pattern_cc << @_attr_value2 << TAG_SEARCH_2_6 << @_attr_name1 << ATTR_EQ
    #@pattern_cc << @_attr_value1 << TAG_SEARCH_2_2_2 << @_name
    #@pattern_cc << TAG_SEARCH_1_2 << @_name << TAG_CLOSE
    @pattern_cc = "<#{@_name}(\\s[^<>]*(?:#{@_attr_name1}=\"#{@_attr_value1}\"[^<>]*#{@_attr_name2}=\"#{@_attr_value2}\"|#{@_attr_name2}=\"#{@_attr_value2}\"[^<>]*#{@_attr_name1}=\"#{@_attr_value1}\")[^<>]*)>(((?!(#{@_name}[^<>]*>)).)*)<\\/#{@_name}>"

    @pattern = Meteor::Core::Util::PatternCache.get(@pattern_cc)
    #内容あり要素検索
    @res = @pattern.match(@root.document)

    if !@res && !is_match(@@match_tag_sng, tag)
      @res = element_with_5_2
    end

    if @res
      element_with_5_1(name)
    else
      if @error_check
        puts Meteor::Exception::NoSuchElementException.new(name, attr_name1, attr_value1, attr_name2, attr_value2).message
      end
      @elm_ = nil
    end
  end

  @elm_
end
element_without_1(name) click to toggle source
# File lib/meteor.rb, line 4311
def element_without_1(name)
  @elm_ = Meteor::Element.new(name)
  #属性
  @elm_.attributes = @res[1]
  #空要素検索用パターン
  @elm_.pattern = @pattern_cc

  @elm_.document = @res[0]

  @elm_.parser = self
end
element_without_3(name) click to toggle source
# File lib/meteor.rb, line 4384
def element_without_3(name)
  element_without_3_1(name, TAG_SEARCH_2_4_3)
end
element_without_5(name) click to toggle source
# File lib/meteor.rb, line 4489
def element_without_5(name)
  element_without_5_1(name, TAG_SEARCH_2_4_3_2)
end
escape(content) click to toggle source
# File lib/meteor.rb, line 4682
def escape(content)
  #特殊文字の置換
  content = content.gsub(@@pattern_escape, TABLE_FOR_ESCAPE_)

  content
end
escape_content(content, elm) click to toggle source
# File lib/meteor.rb, line 4689
def escape_content(content, elm)
  content.gsub!(@@pattern_escape_content, TABLE_FOR_ESCAPE_CONTENT_)

  content
end
get_attr_map(elm) click to toggle source

get attribute map of element (要素の属性マップを取得する) @param [Meteor::Element] elm element (要素) @return [Meteor::AttributeMap] attribute map (属性マップ)

# File lib/meteor.rb, line 4648
def get_attr_map(elm)
  attrs = Meteor::AttributeMap.new

  elm.attributes.scan(@@pattern_get_attrs_map) do |a, b|
    attrs.store(a, unescape(b))
  end

  elm.attributes.scan(@@pattern_get_attrs_map2) do |a|
    attrs.store(a[0], TRUE)
  end

  attrs.recordable = true

  attrs
end
get_attr_value_(elm, attr_name) click to toggle source
Calls superclass method Meteor::Core::Kernel#get_attr_value_
# File lib/meteor.rb, line 4579
def get_attr_value_(elm, attr_name)
  if is_match(SELECTED, attr_name) && is_match(OPTION, elm.name)
    get_attr_value_r(elm, @@pattern_selected_m)
  elsif is_match(MULTIPLE, attr_name) && is_match(SELECT, elm.name)
    get_attr_value_r(elm, @@pattern_multiple_m)
  elsif is_match(DISABLED, attr_name) && is_match(DISABLE_ELEMENT, elm.name)
    get_attr_value_r(elm, @@pattern_disabled_m)
  elsif is_match(CHECKED, attr_name) && is_match(INPUT, elm.name) && is_match(RADIO, get_type(elm))
    get_attr_value_r(elm, @@pattern_checked_m)
  elsif is_match(READONLY, attr_name) && (is_match(TEXTAREA, elm.name) || (is_match(INPUT, elm.name) && is_match(READONLY_TYPE, get_type(elm))))
    get_attr_value_r(elm, @@pattern_readonly_m)
  else
    super(elm, attr_name)
  end
end
get_attr_value_r(elm, match_p) click to toggle source
# File lib/meteor.rb, line 4609
def get_attr_value_r(elm, match_p)

  @res = match_p.match(elm.attributes)

  if @res
    TRUE
  else
    FALSE
  end
end
get_attrs(elm) click to toggle source

get attribute map (属性マップを取得する) @param [Meteor::Element] elm element (要素) @return [Hash] attribute map (属性マップ)

# File lib/meteor.rb, line 4627
def get_attrs(elm)
  attrs = Hash.new

  elm.attributes.scan(@@pattern_get_attrs_map) do |a, b|
    attrs.store(a, unescape(b))
  end

  elm.attributes.scan(@@pattern_get_attrs_map2) do |a|
    attrs.store(a[0], TRUE)
  end

  attrs
end
get_type(elm) click to toggle source
# File lib/meteor.rb, line 4597
def get_type(elm)
  if !elm.type_value
    elm.type_value = get_attr_value_(elm, TYPE_L)
    if !elm.type_value
      elm.type_value = get_attr_value_(elm, TYPE_U)
    end
  end
  elm.type_value
end
initialize_1(ps) click to toggle source

initializer (イニシャライザ) @param [Meteor::Parser] ps paser (パーサ)

# File lib/meteor.rb, line 4185
def initialize_1(ps)
  @root.document = String.new(ps.document)
  self.document_hook = String.new(ps.document_hook)
  @root.content_type = String.new(ps.root_element.content_type)
  @root.kaigyo_code = ps.root_element.kaigyo_code
end
remove_attrs_(elm, attr_name) click to toggle source
# File lib/meteor.rb, line 4666
def remove_attrs_(elm, attr_name)
  #検索対象属性の論理型是非判定
  if !is_match(@@attr_logic, attr_name)
    #属性検索用パターン
    @pattern = Meteor::Core::Util::PatternCache.get('' << attr_name << ERASE_ATTR_1)
    #@pattern = Meteor::Core::Util::PatternCache.get("#{attr_name}=\"[^\"]*\"\\s?")
    elm.attributes.sub!(@pattern, EMPTY)
  else
    #属性検索用パターン
    @pattern = Meteor::Core::Util::PatternCache.get(attr_name)
    elm.attributes.sub!(@pattern, EMPTY)
  end
end
unescape(content) click to toggle source
# File lib/meteor.rb, line 4698
def unescape(content)
  #特殊文字の置換
  #「<」<-「&lt;」
  #「>」<-「&gt;」
  #「"」<-「&quotl」
  #「 」<-「&nbsp;」
  #「&」<-「&amp;」
  content.gsub(@@pattern_unescape) do
    case $1
      when AND_3
        AND_1
      when QO_3
        DOUBLE_QUATATION
      when AP_3
        AP_1
      when GT_3
        GT_1
      when LT_3
        LT_1
      when NBSP_3
        SPACE
    end
  end

  content
end
unescape_content(content, elm) click to toggle source
# File lib/meteor.rb, line 4727
def unescape_content(content, elm)
  content_ = unescape(content)

  if elm.cx || !is_match(@@match_tag_2, elm.name)
    if content.include?(BR_2)
      #「<br>」->「¥r?¥n」
      content_.gsub!(@@pattern_br_2, @root.kaigyo_code)
    end
  end

  content_
end