class Nlg::Extractors::Base

Attributes

nlg_id[R]
page[R]
url[R]

Public Class Methods

decode_text(encoded_text) click to toggle source
# File lib/bookshark/extractors/nlg/base.rb, line 70
def self.decode_text(encoded_text)
  # encoded_text = File.read(encoded_file_path)
  coder = HTMLEntities.new
  coder.decode(encoded_text)
end
new(id=nil) click to toggle source
# File lib/bookshark/extractors/nlg/base.rb, line 18
def initialize(id=nil)
  load_page(id)
end

Public Instance Methods

decode_text(encoded_text) click to toggle source

Decodes text with escaped html entities and returns the decoded text.

Params:

encoded_text

the text which contains encoded entities

# File lib/bookshark/extractors/nlg/base.rb, line 66
def decode_text(encoded_text)
  self.class.decode_text(encoded_text)
end
load_page(id=nil) click to toggle source
# File lib/bookshark/extractors/nlg/base.rb, line 22
def load_page(id=nil)
  load_page_by_id(id) unless id.nil?
end
load_page_by_id(id) click to toggle source
# File lib/bookshark/extractors/nlg/base.rb, line 26
def load_page_by_id(id)
  begin
    @nlg_id = id unless id.nil? # id is expected to be the last number.
    @url = "http://nbib.nlg.gr/Record/#{@nlg_id}/Export?style=MARCXML"

    pp "Downloading page: #{@url}"

    Net::HTTP.start("nbib.nlg.gr") do |http|
      response = http.get("/Record/#{@nlg_id}/Export?style=MARCXML")
      pp response.content_type
      pp response.code
      raise EmptyPageError.new(@url) unless response.content_type == "text/xml" && response.code == "200"

      @page = response.body
    end

  rescue Errno::ENOENT => e
    pp "Page: #{@url} NOT FOUND."
    pp e
  rescue EmptyPageError => e
    pp "Page: #{@url} is EMPTY."
    pp e
    @page = nil
  rescue OpenURI::HTTPError => e
    pp e
    pp e.io.status
  rescue StandardError => e
    pp "Generic error #{e.class}. Will wait for 2 minutes and then try again."
    pp e
    sleep(120)
    retry
  end
end
present?(value) click to toggle source
# File lib/bookshark/extractors/nlg/base.rb, line 76
def present?(value)
  return (not value.nil? and not value.empty?) ? true : false
end