module Nicos::Parser::Xml
Public Class Methods
getThumbInfo(xml)
click to toggle source
getThumbInfoが返すXMLを解析し、ハッシュオブジェクトにして返します。
@return [HashObj]
# File lib/classes/parser.rb, line 120 def getThumbInfo(xml) doc = XML::Reader.string( xml, :options => XML::Parser::Options::NOBLANKS | XML::Parser::Options::NOENT ) n = -1 parsed = {} while doc.read unless doc.node_type == XML::Reader::TYPE_END_ELEMENT row = case doc.name when "video_id" then parseRow(:video_id, :String, doc) when "title" then parseRow(:title, :String, doc) when "description" then parseRow(:description, :String, doc) when "thumbnail_url" then parseRow(:thumbnail_url, :String, doc) when "movie_type" then parseRow(:movie_type, :String, doc) when "last_res_body" then parseRow(:last_res_body, :String, doc) when "watch_url" then parseRow(:watch_url, :String, doc) when "thumb_type" then parseRow(:thumb_type, :String, doc) when "size_high" then parseRow(:size_high, :Fixnum, doc) when "size_low" then parseRow(:size_low, :Fixnum, doc) when "view_counter" then parseRow(:view_counter, :Fixnum, doc) when "comment_num" then parseRow(:comment_num, :Fixnum, doc) when "mylist_counter" then parseRow(:mylist_counter,:Fixnum, doc) when "embeddable" then parseRow(:embeddable, :Fixnum, doc) when "no_live_play" then parseRow(:no_live_play, :Fixnum, doc) when "user_id" then parseRow(:user_id, :Fixnum, doc) when "first_retrieve" then parseRow(:first_retrieve,:ISO8601, doc) when "length" then parseRow(:length, :Time, doc) when "tags" then parseRow(:tags, :Tags, doc) when "tag" then parseRow(:tag, :Tag, doc) end parsed.update(row) if row != nil end end doc.close parsed end
mylistAtom(xml)
click to toggle source
マイリストのAtomフィードが返すXMLを解析し、ハッシュオブジェクトにして返します。
@return [HashObj]
# File lib/classes/parser.rb, line 231 def mylistAtom(xml) doc = XML::Reader.string( xml, :options => XML::Parser::Options::NOBLANKS | XML::Parser::Options::NOENT ) n = 0 parsed = { :mylist => {}, :entry => [{}] } while doc.read break if doc.name === "entry" unless doc.node_type == XML::Reader::TYPE_END_ELEMENT row = case doc.name when "title" then /(マイリスト )(.+)(‐ニコニコ動画)/ =~ parseRow(:title, :String, doc)[:title] { :title => $2 } when "id" then parseRow(:mylist_id, :mylistId,doc) when "subtitle" then parseRow(:description, :String, doc) when "updated" then parseRow(:updated, :ISO8601, doc) when "name" then parseRow(:author, :String, doc) end parsed[:mylist].update(row) if row != nil end end while doc.read unless doc.node_type == XML::Reader::TYPE_END_ELEMENT # bump up the page number if doc.name === "entry" n += 1 parsed[:entry][n] = {} end row = case doc.name # <title> and <id> are marked up both in mylist and # each entry's node. So we need to assign the value to the # appropriate variable in accordance with node's location. when "title" then parseRow(:title, :String, doc) when "link" then parseRow(:video_id, :videoId, doc) when "id" then parseRow(:item_id, :itemId, doc) when "content" doc.read html = doc.value /(<p\sclass=\"nico-memo\"\>)([^\<]{1,})/ =~ html memo = $2 /(<p\sclass=\"nico-thumbnail\">.+src=\")(http:\/\/[^\"]{1,})/ =~ html thumbnail_url = $2 /(<p\sclass=\"nico-description\"\>)([^\<]{1,})/ =~ html description = $2 /(<strong\sclass\=\"nico-info-length\"\>)([^\<]{1,})/ =~ html length = Nicos::Converter.toSeconds($2) /(<strong\sclass\=\"nico-info-date\"\>)([^\<]{1,})/ =~ html first_retrieve = Nicos::Converter.japToUnix($2) /(<strong\sclass\=\"nico-numbers-view\"\>)([^\<]{1,})/ =~ html view = Nicos::Converter.commaRemover($2) /(<strong\sclass\=\"nico-numbers-res\"\>)([^\<]{1,})/ =~ html res = Nicos::Converter.commaRemover($2) /(<strong\sclass\=\"nico-numbers-mylist\"\>)([^\<]{1,})/ =~ html mylist = Nicos::Converter.commaRemover($2) { :memo => memo, :thumbnail_url => thumbnail_url, :description => description, :length => length, :first_retrieve => first_retrieve, :view => view, :res => res, :mylist => mylist } end parsed[:entry][n].update(row) if row != nil end end doc.close parsed end
parseRow(symbol, type, doc)
click to toggle source
# File lib/classes/parser.rb, line 13 def parseRow(symbol, type, doc) hash = {} value = case type # common when :Fixnum then doc.read doc.value.to_i when :String then doc.read doc.value when :ISO8601 then doc.read Nicos::Converter.iso8601ToUnix(doc.value) when :JapDate then doc.read Nicos::Converter.japToUnix(doc.value) when :Time then doc.read Nicos::Converter.toSeconds(doc.value) # for Mylist Atom when :mylistId then doc.read Nicos::Extractor.mylistId(doc.value) when :itemId then doc.read Nicos::Extractor.itemId(doc.value) when :videoId then doc.move_to_attribute("href") Nicos::Extractor.videoId(doc.value) # for getThumbInfo when :Tags then doc.move_to_attribute("domain") symbol = case doc.value when "jp" then :tags_jp when "tw" then :tags_tw when "de" then :tags_de when "es" then :tags_es end tags = [] lockedTags = [] category = nil locked = false prev = nil now = nil while doc.read unless doc.node_type == XML::Reader::TYPE_END_ENTITY # 終了を判別。もっと環境に依存しない上手いやり方があるはず。 break if doc.name == "tags" if prev == :end category = false locked = false end doc.move_to_attribute("category") category = true if doc.name == "category" doc.move_to_attribute("lock") locked = true if doc.name == "lock" # ノードの開始、値、終了を判別する。 # 例えば<tag>と<tag lock="1"/>が、どちらも'2'と解釈され、開始と終了が区別しづらい。 #http://dotgnu.org/pnetlib-doc/System/Xml/XmlNodeType.html nt = doc.node_type now = if (nt == 2 || nt == 1) && prev != :val then :start elsif (nt == 2 || nt == 15) && prev == :val then :end elsif nt == 3 then :val end val = doc.read_outer_xml #puts #puts now #puts val #puts "cat:#{category} locked:#{locked}" if now == :val obj = { :name => val } obj.merge!({ :locked => true }) if locked == true obj.merge!({ :category => true }) if category == true tags.push(obj) end prev = now end end tags end hash[symbol] = value hash end
tagAtom(xml)
click to toggle source
タグ検索のAtomフィードが返すXMLを解析し、ハッシュオブジェクトにして返します。
@return [HashObj]
# File lib/classes/parser.rb, line 167 def tagAtom(xml) doc = XML::Reader.string( xml, :options => XML::Parser::Options::NOBLANKS | XML::Parser::Options::NOENT ) n = -1 parsed = [{}] while doc.read unless doc.node_type == XML::Reader::TYPE_END_ELEMENT case doc.name when "entry" n += 1 parsed[n] = {} when "title" doc.read parsed[n][:title] = doc.value when "link" doc.move_to_attribute("href") parsed[n][:video_id] = doc.value.split('/')[4] when "published", "updated" label = doc.name doc.read parsed[n][label] = Nicos::Converter.iso8601ToUnix(doc.value) when "p" doc.move_to_attribute("class") case doc.value when "nico-thumbnail" doc.read doc.move_to_attribute("src") parsed[n][:thumbnail_url] = doc.value when "nico-description" doc.read parsed[n][:description] = doc.value end when "strong" doc.move_to_attribute("class") case doc.value when "nico-info-length" doc.read parsed[n][:length] = Nicos::Converter.toSeconds(doc.value) when "nico-info-date" label = doc.name doc.read parsed[n][:first_retrieve] = Nicos::Converter.japToUnix(doc.value) when "nico-numbers-view", "nico-numbers-res", "nico-numbers-mylist" label = doc.value doc.read parsed[n][label.slice(13,99)] = Nicos::Converter::commaRemover(doc.value) end end end end doc.close parsed end
Public Instance Methods
parseTag()
click to toggle source
# File lib/classes/parser.rb, line 114 def parseTag end
Private Instance Methods
getThumbInfo(xml)
click to toggle source
getThumbInfoが返すXMLを解析し、ハッシュオブジェクトにして返します。
@return [HashObj]
# File lib/classes/parser.rb, line 120 def getThumbInfo(xml) doc = XML::Reader.string( xml, :options => XML::Parser::Options::NOBLANKS | XML::Parser::Options::NOENT ) n = -1 parsed = {} while doc.read unless doc.node_type == XML::Reader::TYPE_END_ELEMENT row = case doc.name when "video_id" then parseRow(:video_id, :String, doc) when "title" then parseRow(:title, :String, doc) when "description" then parseRow(:description, :String, doc) when "thumbnail_url" then parseRow(:thumbnail_url, :String, doc) when "movie_type" then parseRow(:movie_type, :String, doc) when "last_res_body" then parseRow(:last_res_body, :String, doc) when "watch_url" then parseRow(:watch_url, :String, doc) when "thumb_type" then parseRow(:thumb_type, :String, doc) when "size_high" then parseRow(:size_high, :Fixnum, doc) when "size_low" then parseRow(:size_low, :Fixnum, doc) when "view_counter" then parseRow(:view_counter, :Fixnum, doc) when "comment_num" then parseRow(:comment_num, :Fixnum, doc) when "mylist_counter" then parseRow(:mylist_counter,:Fixnum, doc) when "embeddable" then parseRow(:embeddable, :Fixnum, doc) when "no_live_play" then parseRow(:no_live_play, :Fixnum, doc) when "user_id" then parseRow(:user_id, :Fixnum, doc) when "first_retrieve" then parseRow(:first_retrieve,:ISO8601, doc) when "length" then parseRow(:length, :Time, doc) when "tags" then parseRow(:tags, :Tags, doc) when "tag" then parseRow(:tag, :Tag, doc) end parsed.update(row) if row != nil end end doc.close parsed end
mylistAtom(xml)
click to toggle source
マイリストのAtomフィードが返すXMLを解析し、ハッシュオブジェクトにして返します。
@return [HashObj]
# File lib/classes/parser.rb, line 231 def mylistAtom(xml) doc = XML::Reader.string( xml, :options => XML::Parser::Options::NOBLANKS | XML::Parser::Options::NOENT ) n = 0 parsed = { :mylist => {}, :entry => [{}] } while doc.read break if doc.name === "entry" unless doc.node_type == XML::Reader::TYPE_END_ELEMENT row = case doc.name when "title" then /(マイリスト )(.+)(‐ニコニコ動画)/ =~ parseRow(:title, :String, doc)[:title] { :title => $2 } when "id" then parseRow(:mylist_id, :mylistId,doc) when "subtitle" then parseRow(:description, :String, doc) when "updated" then parseRow(:updated, :ISO8601, doc) when "name" then parseRow(:author, :String, doc) end parsed[:mylist].update(row) if row != nil end end while doc.read unless doc.node_type == XML::Reader::TYPE_END_ELEMENT # bump up the page number if doc.name === "entry" n += 1 parsed[:entry][n] = {} end row = case doc.name # <title> and <id> are marked up both in mylist and # each entry's node. So we need to assign the value to the # appropriate variable in accordance with node's location. when "title" then parseRow(:title, :String, doc) when "link" then parseRow(:video_id, :videoId, doc) when "id" then parseRow(:item_id, :itemId, doc) when "content" doc.read html = doc.value /(<p\sclass=\"nico-memo\"\>)([^\<]{1,})/ =~ html memo = $2 /(<p\sclass=\"nico-thumbnail\">.+src=\")(http:\/\/[^\"]{1,})/ =~ html thumbnail_url = $2 /(<p\sclass=\"nico-description\"\>)([^\<]{1,})/ =~ html description = $2 /(<strong\sclass\=\"nico-info-length\"\>)([^\<]{1,})/ =~ html length = Nicos::Converter.toSeconds($2) /(<strong\sclass\=\"nico-info-date\"\>)([^\<]{1,})/ =~ html first_retrieve = Nicos::Converter.japToUnix($2) /(<strong\sclass\=\"nico-numbers-view\"\>)([^\<]{1,})/ =~ html view = Nicos::Converter.commaRemover($2) /(<strong\sclass\=\"nico-numbers-res\"\>)([^\<]{1,})/ =~ html res = Nicos::Converter.commaRemover($2) /(<strong\sclass\=\"nico-numbers-mylist\"\>)([^\<]{1,})/ =~ html mylist = Nicos::Converter.commaRemover($2) { :memo => memo, :thumbnail_url => thumbnail_url, :description => description, :length => length, :first_retrieve => first_retrieve, :view => view, :res => res, :mylist => mylist } end parsed[:entry][n].update(row) if row != nil end end doc.close parsed end
parseRow(symbol, type, doc)
click to toggle source
# File lib/classes/parser.rb, line 13 def parseRow(symbol, type, doc) hash = {} value = case type # common when :Fixnum then doc.read doc.value.to_i when :String then doc.read doc.value when :ISO8601 then doc.read Nicos::Converter.iso8601ToUnix(doc.value) when :JapDate then doc.read Nicos::Converter.japToUnix(doc.value) when :Time then doc.read Nicos::Converter.toSeconds(doc.value) # for Mylist Atom when :mylistId then doc.read Nicos::Extractor.mylistId(doc.value) when :itemId then doc.read Nicos::Extractor.itemId(doc.value) when :videoId then doc.move_to_attribute("href") Nicos::Extractor.videoId(doc.value) # for getThumbInfo when :Tags then doc.move_to_attribute("domain") symbol = case doc.value when "jp" then :tags_jp when "tw" then :tags_tw when "de" then :tags_de when "es" then :tags_es end tags = [] lockedTags = [] category = nil locked = false prev = nil now = nil while doc.read unless doc.node_type == XML::Reader::TYPE_END_ENTITY # 終了を判別。もっと環境に依存しない上手いやり方があるはず。 break if doc.name == "tags" if prev == :end category = false locked = false end doc.move_to_attribute("category") category = true if doc.name == "category" doc.move_to_attribute("lock") locked = true if doc.name == "lock" # ノードの開始、値、終了を判別する。 # 例えば<tag>と<tag lock="1"/>が、どちらも'2'と解釈され、開始と終了が区別しづらい。 #http://dotgnu.org/pnetlib-doc/System/Xml/XmlNodeType.html nt = doc.node_type now = if (nt == 2 || nt == 1) && prev != :val then :start elsif (nt == 2 || nt == 15) && prev == :val then :end elsif nt == 3 then :val end val = doc.read_outer_xml #puts #puts now #puts val #puts "cat:#{category} locked:#{locked}" if now == :val obj = { :name => val } obj.merge!({ :locked => true }) if locked == true obj.merge!({ :category => true }) if category == true tags.push(obj) end prev = now end end tags end hash[symbol] = value hash end
tagAtom(xml)
click to toggle source
タグ検索のAtomフィードが返すXMLを解析し、ハッシュオブジェクトにして返します。
@return [HashObj]
# File lib/classes/parser.rb, line 167 def tagAtom(xml) doc = XML::Reader.string( xml, :options => XML::Parser::Options::NOBLANKS | XML::Parser::Options::NOENT ) n = -1 parsed = [{}] while doc.read unless doc.node_type == XML::Reader::TYPE_END_ELEMENT case doc.name when "entry" n += 1 parsed[n] = {} when "title" doc.read parsed[n][:title] = doc.value when "link" doc.move_to_attribute("href") parsed[n][:video_id] = doc.value.split('/')[4] when "published", "updated" label = doc.name doc.read parsed[n][label] = Nicos::Converter.iso8601ToUnix(doc.value) when "p" doc.move_to_attribute("class") case doc.value when "nico-thumbnail" doc.read doc.move_to_attribute("src") parsed[n][:thumbnail_url] = doc.value when "nico-description" doc.read parsed[n][:description] = doc.value end when "strong" doc.move_to_attribute("class") case doc.value when "nico-info-length" doc.read parsed[n][:length] = Nicos::Converter.toSeconds(doc.value) when "nico-info-date" label = doc.name doc.read parsed[n][:first_retrieve] = Nicos::Converter.japToUnix(doc.value) when "nico-numbers-view", "nico-numbers-res", "nico-numbers-mylist" label = doc.value doc.read parsed[n][label.slice(13,99)] = Nicos::Converter::commaRemover(doc.value) end end end end doc.close parsed end