class Meteor::Ml::Html4::ParserImpl
HTML4 parser (HTMLパーサ)
Constants
- BR_1
- BR_2
- CHECKED
- CHECKED_M
- CHECKED_R
- CONTENT
- CONTENT_TYPE
- DISABLED
- DISABLED_M
- DISABLED_R
DISABLED_M
= [' disabled ',' disiabled','DISABLED
',' DISABLED']- DISABLE_ELEMENT
DISABLE_ELEMENT
= “input|textarea|select|optgroup”- FALSE
- GET_ATTRS_MAP2
- HTTP_EQUIV
- INPUT
- KAIGYO_CODE
KAIGYO_CODE
= “r?n|r”KAIGYO_CODE
= “rn|n|r”- META
- META_S
- MULTIPLE
- MULTIPLE_M
- MULTIPLE_R
MULTIPLE_M
= [' multiple ',' multiple','MULTIPLE
',' MULTIPLE']- NBSP_2
- NBSP_3
- OPTION
- PATTERN_ESCAPE
- PATTERN_ESCAPE_CONTENT
- PATTERN_UNESCAPE
- RADIO
- READONLY
- READONLY_M
- READONLY_R
READONLY_M
= [' readonly ',' readonly','READONLY
',' READONLY']- READONLY_TYPE
READONLY_TYPE
= “text|password”- SELECT
- SELECTED
- SELECTED_M
@@pattern_option = Regexp.new(OPTION) @@pattern_selected = Regexp.new(SELECTED) @@pattern_input = Regexp.new(INPUT) @@pattern_checked = Regexp.new(CHECKED) @@pattern_radio = Regexp.new(RADIO) @@pattern_disable_element = Regexp.new(DISABLE_ELEMENT) @@pattern_disabled = Regexp.new(DISABLED) @@pattern_readonly_type = Regexp.new(READONLY_TYPE) @@pattern_textarea = Regexp.new(TEXTAREA) @@pattern_readonly = Regexp.new(READONLY) @@pattern_select = Regexp.new(SELECT) @@pattern_multiple = Regexp.new(MULTIPLE)
- SELECTED_R
SELECTED_M
= [' selected ',' selected','SELECTED
',' SELECTED']- TABLE_FOR_ESCAPE_
@@pattern_@@match_tag = Regexp.new(@@match_tag) @@pattern_@@match_tag2 = Regexp.new(@@match_tag_2)
- TABLE_FOR_ESCAPE_CONTENT_
- TEXTAREA
- TRUE
- TYPE_L
@@pattern_true = Regexp.new(TRUE) @@pattern_false = Regexp.new(FALSE)
- TYPE_U
Public Class Methods
initializer (イニシャライザ) @overload initialize @overload initialize(ps)
@param [Meteor::Parser] ps parser (パーサ)
Meteor::Core::Kernel::new
# File lib/meteor.rb, line 4160 def initialize(*args) super() @doc_type = Parser::HTML4 case args.length when ZERO #initialize_0 when ONE initialize_1(args[0]) else raise ArgumentError end end
Public Instance Methods
get content type (コンテントタイプを取得する) @return [String] conent type (コンテントタイプ)
# File lib/meteor.rb, line 4219 def content_type @root.content_type end
parse document (ドキュメントを解析する)
# File lib/meteor.rb, line 4197 def parse analyze_ml end
Private Instance Methods
analyze document , set content type (ドキュメントをパースし、コンテントタイプをセットする)
# File lib/meteor.rb, line 4226 def analyze_content_type @error_check = false element_3(META_S, HTTP_EQUIV, CONTENT_TYPE) if !@elm_ element_3(META, HTTP_EQUIV, CONTENT_TYPE) end @error_check = true if @elm_ @root.content_type = @elm_.attr(CONTENT) else @root.content_type = EMPTY end end
analuze document , set newline (ドキュメントをパースし、改行コードをセットする)
# File lib/meteor.rb, line 4249 def analyze_kaigyo_code #改行コード取得 for a in KAIGYO_CODE if @root.document.include?(a) @root.kaigyo_code = a #puts "kaigyo:" << @root.kaigyo_code end end end
analyze document (ドキュメントをパースする)
# File lib/meteor.rb, line 4204 def analyze_ml #content-typeの取得 analyze_content_type #改行コードの取得 analyze_kaigyo_code @res = nil end
Meteor::Core::Kernel#edit_attrs_
# File lib/meteor.rb, line 4533 def edit_attrs_(elm, attr_name, attr_value) if is_match(SELECTED, attr_name) && is_match(OPTION, elm.name) edit_attrs_5(elm, attr_name, attr_value, @@pattern_selected_m, @@pattern_selected_r) elsif is_match(MULTIPLE, attr_name) && is_match(SELECT, elm.name) edit_attrs_5(elm, attr_name, attr_value, @@pattern_multiple_m, @@pattern_multiple_r) elsif is_match(DISABLED, attr_name) && is_match(DISABLE_ELEMENT, elm.name) edit_attrs_5(elm, attr_name, attr_value, @@pattern_disabled_m, @@pattern_disabled_r) elsif is_match(CHECKED, attr_name) && is_match(INPUT, elm.name) && is_match(RADIO, get_type(elm)) edit_attrs_5(elm, attr_name, attr_value, @@pattern_checked_m, @@pattern_checked_r) elsif is_match(READONLY, attr_name) && (is_match(TEXTAREA, elm.name) || (is_match(INPUT, elm.name) && is_match(READONLY_TYPE, get_type(elm)))) edit_attrs_5(elm, attr_name, attr_value, @@pattern_readonly_m, @@pattern_readonly_r) else super(elm, attr_name, attr_value) end end
# File lib/meteor.rb, line 4551 def edit_attrs_5(elm, attr_name, attr_value, match_p, replace) if true.equal?(attr_value) || is_match(TRUE, attr_value) @res = match_p.match(elm.attributes) if !@res if !EMPTY.eql?(elm.attributes) && !EMPTY.eql?(elm.attributes.strip) elm.attributes = '' << SPACE << elm.attributes.strip else elm.attributes = '' end elm.attributes << SPACE << attr_name #else end elsif false.equal?(attr_value) || is_match(FALSE, attr_value) elm.attributes.sub!(replace, EMPTY) end end
# File lib/meteor.rb, line 4573 def edit_document_1(elm) edit_document_2(elm, TAG_CLOSE) end
get element using tag name (要素のタグ名で検索し、要素を取得する) @param [String] name tag name (タグ名) @return [Meteor::Element] element (要素)
# File lib/meteor.rb, line 4268 def element_1(name) @_name = Regexp.quote(name) #空要素の場合(<->内容あり要素の場合) if is_match(@@match_tag, name) #空要素検索用パターン @pattern_cc = '' << TAG_OPEN << @_name << TAG_SEARCH_1_4_2 #@pattern_cc = "<#{@_name}(|\\s[^<>]*)>" @pattern = Meteor::Core::Util::PatternCache.get(@pattern_cc) @res = @pattern.match(@root.document) if @res element_without_1(name) else if @error_check puts Meteor::Exception::NoSuchElementException.new(name).message end @elm_ = nil end else #内容あり要素検索用パターン #@pattern_cc = '' << TAG_OPEN << @_name << TAG_SEARCH_1_1 << @_name #@pattern_cc << TAG_SEARCH_1_2 << @_name << TAG_CLOSE @pattern_cc = "<#{@_name}(|\\s[^<>]*)>(((?!(#{tag}[^<>]*>)).)*)<\\/#{@_name}>" @pattern = Meteor::Core::Util::PatternCache.get(@pattern_cc) #内容あり要素検索 @res = @pattern.match(@root.document) #内容あり要素の場合 if @res element_with_1(name) else if @error_check puts Meteor::Exception::NoSuchElementException.new(name).message end @elm_ = nil end end @elm_ end
get element using attribute(name=“value”) (属性(属性名=“属性値”)で検索し、要素を取得する) @param [String] attr_name attribute name (属性名) @param [String] attr_value attribute value (属性値) @return [Meteor::Element] element (要素)
# File lib/meteor.rb, line 4396 def element_2(attr_name, attr_value) element_quote_2(attr_name, attr_value) #@pattern_cc = '' << TAG_SEARCH_3_1 << @_attr_name << ATTR_EQ << @_attr_value #@pattern_cc << TAG_SEARCH_2_4_4 @pattern_cc = "<([^<>\"]*)\\s[^<>]*#{@_attr_name}=\"#{@_attr_value}\"[^<>]*>" @pattern = Meteor::Core::Util::PatternCache.get(@pattern_cc) @res = @pattern.match(@root.document) if @res element_3(@res[1], attr_name, attr_value) else if @error_check puts Meteor::Exception::NoSuchElementException.new(attr_name, attr_value).message end @elm_ = nil end @elm_ end
get element using tag name and attribute(name=“value”) (要素のタグ名、属性(属性名=“属性値”)で検索し、要素を取得する) @param [String] name tag name (タグ名) @param [String] attr_name attribute name (属性名) @param [String] attr_value attribute value (属性値) @return [Meteor::Element] element (要素)
# File lib/meteor.rb, line 4332 def element_3(name, attr_name, attr_value) element_quote_3(name, attr_name, attr_value) #空要素の場合(<->内容あり要素の場合) if is_match(@@match_tag, name) #空要素検索パターン #@pattern_cc = '' << TAG_OPEN << @_name << TAG_SEARCH_2_1 << @_attr_name << ATTR_EQ #@pattern_cc << @_attr_value << TAG_SEARCH_2_4_3 @pattern_cc = "<#{@_name}(\\s[^<>]*#{@_attr_name}=\"#{@_attr_value}\"[^<>]*)>" @pattern = Meteor::Core::Util::PatternCache.get(@pattern_cc) #空要素検索 @res = @pattern.match(@root.document) if @res element_without_3(name) else if @error_check puts Meteor::Exception::NoSuchElementException.new(name, attr_name, attr_value).message end @elm_ = nil end else #内容あり要素検索パターン #@pattern_cc = '' << TAG_OPEN << @_name << TAG_SEARCH_2_1 << @_attr_name << ATTR_EQ #@pattern_cc << @_attr_value << TAG_SEARCH_2_2 << @_name #@pattern_cc << TAG_SEARCH_1_2 << @_name << TAG_CLOSE @pattern_cc = "<#{@_name}(\\s[^<>]*#{@_attr_name}=\"#{@_attr_value}\"[^<>]*)>(((?!(#{@_name}[^<>]*>)).)*)<\\/#{@_name}>" @pattern = Meteor::Core::Util::PatternCache.get(@pattern_cc) #内容あり要素検索 @res = @pattern.match(@root.document) if !@res && !is_match(@@match_tag_sng, name) @res = element_with_3_2 end if @res element_with_3_1(name) else if @error_check puts Meteor::Exception::NoSuchElementException.new(name, attr_name, attr_value).message end @elm_ = nil end end @elm_ end
get element using attribute1,2(name=“value”) (属性1・属性2(属性名=“属性値”)で検索し、要素を取得する)
@param [String] attr_name1 attribute name1 (属性名1) @param [String] attr_value1 attribute value1 (属性値1) @param [String] attr_name2 attribute name2 (属性名2) @param [String] attr_value2 attribute value2 (属性値2) @return [Meteor::Element] element (要素)
# File lib/meteor.rb, line 4504 def element_4(attr_name1, attr_value1, attr_name2, attr_value2) element_quote_4(attr_name1, attr_value1, attr_name2, attr_value2) #@pattern_cc = '' << TAG_SEARCH_3_1_2_2 << @_attr_name1 << ATTR_EQ << @_attr_value1 #@pattern_cc << TAG_SEARCH_2_6 << @_attr_name2 << ATTR_EQ << @_attr_value2 #@pattern_cc << TAG_SEARCH_2_7 << @_attr_name2 << ATTR_EQ << @_attr_value2 #@pattern_cc << TAG_SEARCH_2_6 << @_attr_name1 << ATTR_EQ << @_attr_value1 #@pattern_cc << TAG_SEARCH_2_4_3_2 @pattern_cc = "<([^<>\"]*)\\s([^<>]*(#{@_attr_name1}=\"#{@_attr_value1}\"[^<>]*#{@_attr_name2}=\"#{@_attr_value2}\"|#{@_attr_name2}=\"#{@_attr_value2}\"[^<>]*#{@_attr_name1}=\"#{@_attr_value1}\")[^<>]*)>" @pattern = Meteor::Core::Util::PatternCache.get(@pattern_cc) @res = @pattern.match(@root.document) if @res element_5(@res[1], attr_name1, attr_value1, attr_name2, attr_value2) else if @error_check puts Meteor::Exception::NoSuchElementException.new(attr_name1, attr_value1, attr_name2, attr_value2).message end @elm_ = nil end @elm_ end
get element using tag name and attribute1,2(name=“value”) (要素のタグ名と属性1・属性2(属性名=“属性値”)で検索し、要素を取得する) @param [String] name tag name (タグ名) @param [String] attr_name1 attribute name1 (属性名1) @param [String] attr_value1 attribute value1 (属性値1) @param [String] attr_name2 attribute name2 (属性名2) @param [String] attr_value2 attribute value2 (属性値2) @return [Meteor::Element] element (要素)
# File lib/meteor.rb, line 4430 def element_5(name, attr_name1, attr_value1, attr_name2, attr_value2) element_quote_5(name, attr_name1, attr_value1, attr_name2, attr_value2) #空要素の場合(<->内容あり要素の場合) if is_match(@@match_tag, name) #空要素検索パターン #@pattern_cc = '' << TAG_OPEN << @_name << TAG_SEARCH_2_1_2 << @_attr_name1 << ATTR_EQ #@pattern_cc << @_attr_value1 << TAG_SEARCH_2_6 << @_attr_name2 << ATTR_EQ #@pattern_cc << @_attr_value2 << TAG_SEARCH_2_7 << @_attr_name2 << ATTR_EQ #@pattern_cc << @_attr_value2 << TAG_SEARCH_2_6 << @_attr_name1 << ATTR_EQ #@pattern_cc << @_attr_value1 << TAG_SEARCH_2_4_3_2 @pattern_cc = "<#{@_name}(\\s[^<>]*(?:#{@_attr_name1}=\"#{@_attr_value1}\"[^<>]*#{@_attr_name2}=\"#{@_attr_value2}\"|#{@_attr_name2}=\"#{@_attr_value2}\"[^<>]*#{@_attr_name1}=\"#{@_attr_value1}\")[^<>]*)>" @pattern = Meteor::Core::Util::PatternCache.get(@pattern_cc) #空要素検索 @res = @pattern.match(@root.document) if @res element_without_5(name) else if @error_check puts Meteor::Exception::NoSuchElementException.new(name, attr_name1, attr_value1, attr_name2, attr_value2).message end @elm_ = nil end else #内容あり要素検索パターン #@pattern_cc = '' << TAG_OPEN << @_name << TAG_SEARCH_2_1_2 << @_attr_name1 << ATTR_EQ #@pattern_cc << @_attr_value1 << TAG_SEARCH_2_6 << @_attr_name2 << ATTR_EQ #@pattern_cc << @_attr_value2 << TAG_SEARCH_2_7 << @_attr_name2 << ATTR_EQ #@pattern_cc << @_attr_value2 << TAG_SEARCH_2_6 << @_attr_name1 << ATTR_EQ #@pattern_cc << @_attr_value1 << TAG_SEARCH_2_2_2 << @_name #@pattern_cc << TAG_SEARCH_1_2 << @_name << TAG_CLOSE @pattern_cc = "<#{@_name}(\\s[^<>]*(?:#{@_attr_name1}=\"#{@_attr_value1}\"[^<>]*#{@_attr_name2}=\"#{@_attr_value2}\"|#{@_attr_name2}=\"#{@_attr_value2}\"[^<>]*#{@_attr_name1}=\"#{@_attr_value1}\")[^<>]*)>(((?!(#{@_name}[^<>]*>)).)*)<\\/#{@_name}>" @pattern = Meteor::Core::Util::PatternCache.get(@pattern_cc) #内容あり要素検索 @res = @pattern.match(@root.document) if !@res && !is_match(@@match_tag_sng, tag) @res = element_with_5_2 end if @res element_with_5_1(name) else if @error_check puts Meteor::Exception::NoSuchElementException.new(name, attr_name1, attr_value1, attr_name2, attr_value2).message end @elm_ = nil end end @elm_ end
# File lib/meteor.rb, line 4311 def element_without_1(name) @elm_ = Meteor::Element.new(name) #属性 @elm_.attributes = @res[1] #空要素検索用パターン @elm_.pattern = @pattern_cc @elm_.document = @res[0] @elm_.parser = self end
# File lib/meteor.rb, line 4384 def element_without_3(name) element_without_3_1(name, TAG_SEARCH_2_4_3) end
# File lib/meteor.rb, line 4489 def element_without_5(name) element_without_5_1(name, TAG_SEARCH_2_4_3_2) end
# File lib/meteor.rb, line 4682 def escape(content) #特殊文字の置換 content = content.gsub(@@pattern_escape, TABLE_FOR_ESCAPE_) content end
# File lib/meteor.rb, line 4689 def escape_content(content, elm) content.gsub!(@@pattern_escape_content, TABLE_FOR_ESCAPE_CONTENT_) content end
get attribute map of element (要素の属性マップを取得する) @param [Meteor::Element] elm element (要素) @return [Meteor::AttributeMap] attribute map (属性マップ)
# File lib/meteor.rb, line 4648 def get_attr_map(elm) attrs = Meteor::AttributeMap.new elm.attributes.scan(@@pattern_get_attrs_map) do |a, b| attrs.store(a, unescape(b)) end elm.attributes.scan(@@pattern_get_attrs_map2) do |a| attrs.store(a[0], TRUE) end attrs.recordable = true attrs end
Meteor::Core::Kernel#get_attr_value_
# File lib/meteor.rb, line 4579 def get_attr_value_(elm, attr_name) if is_match(SELECTED, attr_name) && is_match(OPTION, elm.name) get_attr_value_r(elm, @@pattern_selected_m) elsif is_match(MULTIPLE, attr_name) && is_match(SELECT, elm.name) get_attr_value_r(elm, @@pattern_multiple_m) elsif is_match(DISABLED, attr_name) && is_match(DISABLE_ELEMENT, elm.name) get_attr_value_r(elm, @@pattern_disabled_m) elsif is_match(CHECKED, attr_name) && is_match(INPUT, elm.name) && is_match(RADIO, get_type(elm)) get_attr_value_r(elm, @@pattern_checked_m) elsif is_match(READONLY, attr_name) && (is_match(TEXTAREA, elm.name) || (is_match(INPUT, elm.name) && is_match(READONLY_TYPE, get_type(elm)))) get_attr_value_r(elm, @@pattern_readonly_m) else super(elm, attr_name) end end
# File lib/meteor.rb, line 4609 def get_attr_value_r(elm, match_p) @res = match_p.match(elm.attributes) if @res TRUE else FALSE end end
get attribute map (属性マップを取得する) @param [Meteor::Element] elm element (要素) @return [Hash] attribute map (属性マップ)
# File lib/meteor.rb, line 4627 def get_attrs(elm) attrs = Hash.new elm.attributes.scan(@@pattern_get_attrs_map) do |a, b| attrs.store(a, unescape(b)) end elm.attributes.scan(@@pattern_get_attrs_map2) do |a| attrs.store(a[0], TRUE) end attrs end
# File lib/meteor.rb, line 4597 def get_type(elm) if !elm.type_value elm.type_value = get_attr_value_(elm, TYPE_L) if !elm.type_value elm.type_value = get_attr_value_(elm, TYPE_U) end end elm.type_value end
initializer (イニシャライザ) @param [Meteor::Parser] ps paser (パーサ)
# File lib/meteor.rb, line 4185 def initialize_1(ps) @root.document = String.new(ps.document) self.document_hook = String.new(ps.document_hook) @root.content_type = String.new(ps.root_element.content_type) @root.kaigyo_code = ps.root_element.kaigyo_code end
# File lib/meteor.rb, line 4666 def remove_attrs_(elm, attr_name) #検索対象属性の論理型是非判定 if !is_match(@@attr_logic, attr_name) #属性検索用パターン @pattern = Meteor::Core::Util::PatternCache.get('' << attr_name << ERASE_ATTR_1) #@pattern = Meteor::Core::Util::PatternCache.get("#{attr_name}=\"[^\"]*\"\\s?") elm.attributes.sub!(@pattern, EMPTY) else #属性検索用パターン @pattern = Meteor::Core::Util::PatternCache.get(attr_name) elm.attributes.sub!(@pattern, EMPTY) end end
# File lib/meteor.rb, line 4698 def unescape(content) #特殊文字の置換 #「<」<-「<」 #「>」<-「>」 #「"」<-「"l」 #「 」<-「 」 #「&」<-「&」 content.gsub(@@pattern_unescape) do case $1 when AND_3 AND_1 when QO_3 DOUBLE_QUATATION when AP_3 AP_1 when GT_3 GT_1 when LT_3 LT_1 when NBSP_3 SPACE end end content end
# File lib/meteor.rb, line 4727 def unescape_content(content, elm) content_ = unescape(content) if elm.cx || !is_match(@@match_tag_2, elm.name) if content.include?(BR_2) #「<br>」->「¥r?¥n」 content_.gsub!(@@pattern_br_2, @root.kaigyo_code) end end content_ end