module Gbbib::SecScrapper

Sector standard scrapper

Public Class Methods

scrape_doc(pid) click to toggle source

@param pid [String] standard's page id @return [Gbbib::GbBibliographicItem]

# File lib/gbbib/sec_scrapper.rb, line 35
def scrape_doc(pid)
  src = "http://www.std.gov.cn/hb/search/stdHBDetailed?id=#{pid}"
  page_uri = URI src
  begin
    doc = Nokogiri::HTML Net::HTTP.get(page_uri)
    GbBibliographicItem.new scrapped_data(doc, src: src)
  rescue
    warn "Cannot access #{src}"
  end
end
scrape_page(text) click to toggle source

@param text [String] code of standard for serarch @return [Gbbib::HitCollection]

# File lib/gbbib/sec_scrapper.rb, line 20
def scrape_page(text)
  uri = URI "http://www.std.gov.cn/hb/search/hbPage?searchText=#{text}"
  begin
    res = JSON.parse Net::HTTP.get(uri)
    hits = res['rows'].map do |r|
      Hit.new pid: r['id'], title: r['STD_CODE'], scrapper: self
    end
    HitCollection.new hits
  rescue
    warn "Cannot access #{uri}"
  end
end

Private Class Methods

get_committee(doc) click to toggle source

@param doc [Nokogiri::HTML::Document] @return [Hash]

* :type [String]
* :name [String]
# File lib/gbbib/sec_scrapper.rb, line 52
def get_committee(doc)
  ref = get_ref(doc)
  name = get_prefix(ref)['administration']
  { type: 'technical', name: name }
end