module Gbbib::SecScrapper
Sector standard scrapper
Public Class Methods
scrape_doc(pid)
click to toggle source
@param pid [String] standard's page id @return [Gbbib::GbBibliographicItem]
# File lib/gbbib/sec_scrapper.rb, line 35 def scrape_doc(pid) src = "http://www.std.gov.cn/hb/search/stdHBDetailed?id=#{pid}" page_uri = URI src begin doc = Nokogiri::HTML Net::HTTP.get(page_uri) GbBibliographicItem.new scrapped_data(doc, src: src) rescue warn "Cannot access #{src}" end end
scrape_page(text)
click to toggle source
@param text [String] code of standard for serarch @return [Gbbib::HitCollection]
# File lib/gbbib/sec_scrapper.rb, line 20 def scrape_page(text) uri = URI "http://www.std.gov.cn/hb/search/hbPage?searchText=#{text}" begin res = JSON.parse Net::HTTP.get(uri) hits = res['rows'].map do |r| Hit.new pid: r['id'], title: r['STD_CODE'], scrapper: self end HitCollection.new hits rescue warn "Cannot access #{uri}" end end
Private Class Methods
get_committee(doc)
click to toggle source
@param doc [Nokogiri::HTML::Document] @return [Hash]
* :type [String] * :name [String]
# File lib/gbbib/sec_scrapper.rb, line 52 def get_committee(doc) ref = get_ref(doc) name = get_prefix(ref)['administration'] { type: 'technical', name: name } end