class Nlg::Extractors::Base
Attributes
nlg_id[R]
page[R]
url[R]
Public Class Methods
decode_text(encoded_text)
click to toggle source
# File lib/bookshark/extractors/nlg/base.rb, line 70 def self.decode_text(encoded_text) # encoded_text = File.read(encoded_file_path) coder = HTMLEntities.new coder.decode(encoded_text) end
new(id=nil)
click to toggle source
# File lib/bookshark/extractors/nlg/base.rb, line 18 def initialize(id=nil) load_page(id) end
Public Instance Methods
decode_text(encoded_text)
click to toggle source
load_page(id=nil)
click to toggle source
# File lib/bookshark/extractors/nlg/base.rb, line 22 def load_page(id=nil) load_page_by_id(id) unless id.nil? end
load_page_by_id(id)
click to toggle source
# File lib/bookshark/extractors/nlg/base.rb, line 26 def load_page_by_id(id) begin @nlg_id = id unless id.nil? # id is expected to be the last number. @url = "http://nbib.nlg.gr/Record/#{@nlg_id}/Export?style=MARCXML" pp "Downloading page: #{@url}" Net::HTTP.start("nbib.nlg.gr") do |http| response = http.get("/Record/#{@nlg_id}/Export?style=MARCXML") pp response.content_type pp response.code raise EmptyPageError.new(@url) unless response.content_type == "text/xml" && response.code == "200" @page = response.body end rescue Errno::ENOENT => e pp "Page: #{@url} NOT FOUND." pp e rescue EmptyPageError => e pp "Page: #{@url} is EMPTY." pp e @page = nil rescue OpenURI::HTTPError => e pp e pp e.io.status rescue StandardError => e pp "Generic error #{e.class}. Will wait for 2 minutes and then try again." pp e sleep(120) retry end end
present?(value)
click to toggle source
# File lib/bookshark/extractors/nlg/base.rb, line 76 def present?(value) return (not value.nil? and not value.empty?) ? true : false end