class DJNML

THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS “AS IS” AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS “AS IS” AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

Attributes

accession_number[R]
brand[R]
company_address[R]
company_city[R]
company_code[R]
company_name[R]
company_zip[R]
content_code[R]
delete[R]
destination[R]
display_date[R]
dist_id[R]
doc_date[R]
function_code[R]
geo_code[R]
government_code[R]
headline[R]
headline_brand[R]
hot[R]
html[R]
industry_code[R]
isin_code[R]
journal_code[R]
lang[R]
language[R]
market_code[R]
md5[R]
modifications[R]
msize[R]
news_source[R]
origin[R]
original_source[R]
page_citation[R]
page_code[R]
product[R]
product_code[R]
publisher[R]
retention[R]
routing_code[R]
seq[R]
service_id[R]
stat_code[R]
subject_code[R]
sys_id[R]
temp_perm[R]
text[R]
transmission_date[R]
urgency[R]
website[R]

Public Class Methods

load(filename) click to toggle source
# File lib/djnml.rb, line 106
def self.load(filename)

  if filename
    if ! File.exists?(filename)
      raise FileError.new("#{filename}: no such file!")
    end

    obj = self.new
    obj.load(filename)
  end
end
new(data = {}) click to toggle source
# File lib/djnml.rb, line 54
def initialize(data = {})
  @msize = data['msize'].to_i
  @md5   = data['md5']
  @sys_id = data['sys_id']
  @destination = data['destination']
  @dist_id = data['dist_id']
  @transmission_date = Time.parse(data['transmission_date'])
  @publisher = data['publisher']
  @doc_date  = Time.parse(data['doc_date'])
  @product   = data['product']
  @seq       = data['seq'].to_i
  @lang      = data['lang']
  @news_source = data['news_source']
  @origin      = data['origin']
  @service_id  = data['service_id']
  @urgency     = data['urgency']
  @brand            = data['brand']
  @temp_perm        = data['temp_perm']
  @retention        = data['retention']
  @hot              = data['hot']
  @original_source  = data['original_source']
  @accession_number = data['accession_number']
  @page_citation    = data['page_citation']
  @display_date     = Time.parse(data['display_date'])
  @company_code     = data['company_code']
  @isin_code        = data['isin_code']
  @page_code        = data['page_code']
  @industry_code   = data['industry_code'].to_a.map { |c| Codes.new(c) }
  @government_code = data['government_code'].to_a.map { |c| Codes.new(c) }
  @subject_code    = data['subject_code'].to_a.map { |c| Codes.new(c) }
  @market_code     = data['market_code'].to_a.map { |c| Codes.new(c) }
  @product_code    = data['product_code'].to_a.map { |c| Codes.new(c) }
  @geo_code        = data['geo_code'].to_a.map { |c| Codes.new(c) }
  @stat_code       = data['stat_code'].to_a.map { |c| Codes.new(c) }
  @journal_code    = data['stat_code'].to_a.map { |c| Codes.new(c) }
  @routing_code    = data['routing_code'].to_a.map { |c| Codes.new(c) }
  @content_code    = data['content_code'].to_a.map { |c| Codes.new(c) }
  @function_code   = data['function_code'].to_a.map { |c| Codes.new(c) }
  @headline        = data['headline']
  @headline_brand  = data['headline_brand']
  @html            = data['html']
  @text            = data['text']
  @copyright_year  = data['copyright_year']
  @copyright_holder = data['copyright_holder']
  @website = data['website']
  @company_name = data['company_name']
  @company_address=  data['company_address']
  @company_zip = data['company_zip']
  @company_city = data['company_city']
  @language = data['language']
end

Public Instance Methods

has_content?() click to toggle source
# File lib/djnml.rb, line 458
def has_content?
  ! self.text.nil?
end
load(filename) click to toggle source
# File lib/djnml.rb, line 118
def load(filename)
  if ! File.exists?(filename)
    raise FileError.new("#{filename}: no such file!")
  end

  parser = Nokogiri::XML(open(filename))

  # doc tag
  #
  begin
    doc                = parser.search('/doc').first
    @msize             = doc['msize'].to_i
    @md5               = doc['md5']
    @sys_id            = doc['sysId']
    @destination       = doc['destination']
    @dist_id           = doc['distId']
    @transmission_date = Time.parse(doc['transmission-date'])
  rescue
    # ignore errors
  end

  doc = nil

  # djnml tag
  #
  begin
    djnml      = parser.search('/doc/djnml').first
    @publisher = djnml['publisher']
    @doc_date  = Time.parse(djnml['docdate'])
    @product   = djnml['product']
    @seq       = djnml['seq'].to_i
    @lang      = djnml['lang']
  rescue
    # ignore errors
  end

  djnml = nil

  # djn-newswires tag
  #
  begin
    newswires    = parser.search('/doc/djnml/head/docdata/djn/djn-newswires').first
    @news_source = newswires['news-source']
    @origin      = newswires['origin']
    @service_id  = newswires['service-id']
  rescue
    # ignore errors
  end

  newswires = nil

  # djn-press-cutout tag
  #
  presscutout = parser.search('/doc/djnml/head/docdata/djn/djn-newswires/djn-press-cutout').first
  presscutout = nil

  # djn-urgency tag
  #
  begin
    urgency  = parser.search('/doc/djnml/head/docdata/djn/djn-newswires/djn-urgency').first
    @urgency = urgency.text.strip.squeeze.to_i
  rescue
    # ignore errors
  end

  urgency = nil


  # djn-mdata
  #
  begin
    mdata             = parser.search('/doc/djnml/head/docdata/djn/djn-newswires/djn-mdata').first
    @brand            = mdata['brand']
    @temp_perm        = mdata['temp-perm']
    @retention        = mdata['retention']
    @hot              = mdata['hot']
    @original_source  = mdata['original-source']
    @accession_number = mdata['accession-number']
    @page_citation    = mdata['page-citation']
    @display_date     = Time.parse(mdata['display-date'])
  rescue
    # ignore errors
  end

  mdata = nil

  # coding / company
  #
  begin
    ccompany      = parser.search('/doc/djnml/head/docdata/djn/djn-newswires/djn-mdata/djn-coding/djn-company/c')
    @company_code = ccompany.map { |tag| tag.text.strip }
  rescue
    # ignore errors
  end

  ccompany = nil

  # coding / isin
  #
  begin
    isin       = parser.search('/doc/djnml/head/docdata/djn/djn-newswires/djn-mdata/djn-coding/djn-isin/c')
    @isin_code = isin.map { |tag| tag.text.strip }

  rescue
    # ignore errors
  end

  isin = nil

  # coding / page
  #
  begin
    page       = parser.search('/doc/djnml/head/docdata/djn/djn-newswires/djn-mdata/djn-coding/djn-page/c')
    @page_code = page.map { |tag| tag.text.strip }

  rescue
    # ignore errors
  end

  page = nil


  # coding / industry
  #
  begin
    industry       = parser.search('/doc/djnml/head/docdata/djn/djn-newswires/djn-mdata/djn-coding/djn-industry/c')
    @industry_code = industry.map { |tag| Codes.new(tag.text.strip) }
  rescue
    # ignore errors
  end

  industry = nil

  # coding / government
  #
  begin
    government       = parser.search('/doc/djnml/head/docdata/djn/djn-newswires/djn-mdata/djn-coding/djn-government/c')
    @government_code = government.map { |tag| Codes.new(tag.text.strip) }
  rescue
    # ignore errors
  end

  government = nil


  # coding / subject
  #
  begin
    subject       = parser.search('/doc/djnml/head/docdata/djn/djn-newswires/djn-mdata/djn-coding/djn-subject/c')
    @subject_code = subject.map { |tag| Codes.new(tag.text.strip) }
  rescue
    # ignore errors
  end

  subject = nil

  # coding / market
  #
  begin
    market = parser.search('/doc/djnml/head/docdata/djn/djn-newswires/djn-mdata/djn-coding/djn-market/c')
    @market_code = market.map { |tag| Codes.new(tag.text.strip) }
  rescue
    # ignore errors
  end

  market = nil

  # coding / product
  #
  begin
    product = parser.search('/doc/djnml/head/docdata/djn/djn-newswires/djn-mdata/djn-coding/djn-product/c')
    @product_code = product.map { |tag| Codes.new(tag.text.strip) }
  rescue
    # ignore errors
  end

  product = nil

  # coding / geo
  #
  begin
    geo = parser.search('/doc/djnml/head/docdata/djn/djn-newswires/djn-mdata/djn-coding/djn-geo/c')
    @geo_code = geo.map { |tag| Codes.new(tag.text.strip) }
  rescue
    # ignore errors
  end

  geo = nil

  # coding / stat
  #
  begin
    stat = parser.search('/doc/djnml/head/docdata/djn/djn-newswires/djn-mdata/djn-coding/djn-stat/c')
    @stat_code = stat.map { |tag| Codes.new(tag.text.strip) }
  rescue
    # ignore errors
  end

  stat = nil


  # coding / journal
  #
  begin
    journal = parser.search('/doc/djnml/head/docdata/djn/djn-newswires/djn-mdata/djn-coding/djn-journal/c')
    @journal_code = journal.map { |tag| Codes.new(tag.text.strip) }
  rescue
    # ignore errors
  end

  journal = nil


  # coding / routing
  #
  begin
    routing = parser.search('/doc/djnml/head/docdata/djn/djn-newswires/djn-mdata/djn-coding/djn-routing/c')
    @routing_code = routing.map { |tag| Codes.new(tag.text.strip) }
  rescue
    # ignore errors
  end

  routing = nil

  # coding / content
  #
  begin
    content = parser.search('/doc/djnml/head/docdata/djn/djn-newswires/djn-mdata/djn-coding/djn-content/c')
    @content_code = content.map { |tag| Codes.new(tag.text.strip) }
  rescue
    # ignore errors
  end

  content = nil

  # coding / function
  #
  begin
    function = parser.search('/doc/djnml/head/docdata/djn/djn-newswires/djn-mdata/djn-coding/djn-function/c')
    @function_code = function.map { |tag| Codes.new(tag.text.strip) }
  rescue
    # ignore errors
  end

  function = nil


  # body / headline
  #
  begin
    headline  = parser.search('/doc/djnml/body/headline').first
    @headline = headline.text.strip
    @headline_brand = headline['brand-display'] if headline['brand-display']
  rescue
    # ignore errors
  end

  headline  = nil

  # body / text
  #
  begin
  text  = parser.search('/doc/djnml/body/text').first
  @html = text.children.to_xml
  @text = text.children.text.strip
  rescue
    # ignore errors
  end

  text  = nil

  # copyright
  #
  begin
    copyright         = parser.search('/doc/djnml/head/copyright').first
    @copyright_year   = copyright['year'].to_s.strip.to_i
    @copyright_holder = copyright['holder']
  rescue
    # ignore errors
  end

  copyright = nil

  # website
  #
  begin
    if @text =~ /Internet:\s+(.+?)$/
      @website = $1.strip
    end
  rescue
    # ignore errors
  end

  if @text =~ /Company:\s+(\S.+?)\s*\n+\s+(\b.+?)\n+\s+(\d+)\s+(\b.+?)\n+/
    @company_name = $1.strip
    @company_address=  $2.strip
    @company_zip = $3.strip
    @company_city = $4.strip
  end

  # language
  #
  begin
    @language = LanguageDetector.instance.detect(@text)
  rescue
    # ignore errors
  end

  # stories to delete
  #
  begin
    @delete = []
    doc_delete = parser.search('/doc/djnml/administration/doc-delete')
    doc_delete.each do |dd|
      @delete << Delete.new(:product => dd['product'],
                            :doc_date => dd['docdate'],
                            :seq => dd['seq'],
                            :publisher => dd['publisher'],
                            :reason => dd['reason'])
    end
  rescue
    # ignore errors
  end

  # replacements
  #
  @modifications = []
    doc_modify = parser.search('/doc/djnml/administration/doc-modify').first

    mods = parser.search('/doc/djnml/administration/doc-modify/modify-replace')
    mods.each do |m|
      @modifications << Modification.new(:doc_date => doc_modify['docdate'],
                                         :product => doc_modify['product'],
                                         :publisher => doc_modify['publisher'],
                                         :seq => doc_modify['seq'],
                                         :xml => m)
    end
  self
end