module Gbbib::GbScrapper

National standard scrapper.

Public Class Methods

get_committee(doc) click to toggle source

@param doc [Nokogiri::HTML] @return [Hash]

* :type [String]
* :name [String]
# File lib/gbbib/gb_scrapper.rb, line 51
def get_committee(doc)
  name = doc.xpath('//p/a[1]/following-sibling::text()').text
    .match(/(?<=()[^)]+/).to_s
  { type: 'technical', name: name }
end
scrape_doc(pid) click to toggle source

@param pid [Strin] standard's page id @return [Gbbib::GbBibliographicItem]

# File lib/gbbib/gb_scrapper.rb, line 37
def scrape_doc(pid)
  src = 'http://www.std.gov.cn/gb/search/gbDetailed?id=' + pid
  begin
    doc = Nokogiri::HTML OpenURI.open_uri(src)
    GbBibliographicItem.new scrapped_data(doc, src: src)
  rescue
    warn "Cannot access http://www.std.gov.cn/search/stdPage"
  end
end
scrape_page(text) click to toggle source

@param text [Strin] code of standard for serarch @return [Gbbib::HitCollection]

# File lib/gbbib/gb_scrapper.rb, line 20
def scrape_page(text)
  begin
    search_html = OpenURI.open_uri(
      'http://www.std.gov.cn/search/stdPage?q=' + text
    )
    result = Nokogiri::HTML search_html
    hits = result.css('.s-title a').map do |h|
      Hit.new pid: h[:pid], title: h.text, scrapper: self
    end
    HitCollection.new hits
  rescue
    warn "Cannot access http://www.std.gov.cn/search/stdPage"
  end
end