class EzproxyProviders::ScienceDirect

Public Class Methods

parse(path, params) click to toggle source
# File lib/logstash/filters/ezproxy_providers/sciencedirect.rb, line 4
def self.parse (path, params)

  data = {}

  if (params.key?("_ob"))
    if (params['_cdi'])
      data['title_id'] = params['_cdi'][0]
    end

    case (params["_ob"][0])
    when 'PdfDownloadURL'
      data['mime'] = 'PDF'

      data['rtype'] = 'ARTICLES_BUNDLE'

      data['unit_id'] = params['_hubEid'][0]
      data["pii"]    = (params['_hubEid'][0] || '').split('-')[2]


      if (params.key?("_isbn") || params.key?('isBook'))
        data['rtype']            = 'CHAPTERS_BUNDLE'
        data['print_identifier'] = params['_isbn'][0]
        data['title_id']         = params['_isbn'][0]
        data['unit_id']          = params['_isbn'][0]
      end

      if (data["pii"])
        data['title_id'] = data["pii"][1, 8]
        data['print_identifier'] = data["pii"][1, 4] + "-" + data["pii"][5, 4]
      end

    when 'IssueURL'
      data['title_id'] = (params['_tockey'][0] || '').split('#')[2]
      data['rtype']    = 'TOC'
      data['mime']     = 'MISC'

    when 'ArticleURL'
      case (params['_fmt'][0])
      when 'summary'
        data['rtype'] = 'ABS'
        data['mime']  = 'MISC'
      when 'full'
        data['rtype'] = 'ARTICLE'
        data['mime']  = 'HTML'
      end

    when 'MImg'
      data['rtype'] = 'ARTICLE'
      data['mime']  = 'PDF'
    
    when 'MiamiImageURL'
      if (params['_pii'])

        data["pii"]      = params['_pii'][0]
        data['unit_id']   = params['_pii'][0]

        if (params['_pii'][0][0] === 'B')
          data['print_identifier'] = params['_pii'][0][1, 13]
          data['title_id']         = data['print_identifier']
          data['rtype']            = 'BOOK_SECTION'
          data['mime']             = 'PDF'

        else
          data['print_identifier'] = params['_pii'][0][1, 4] + "-" + params['_pii'][0][5, 4]
          data['title_id']         = params['_pii'][0][1, 8]
          data['rtype']            = 'ARTICLE'
          data['mime']             = 'PDF'
        end
      end

    when 'PdfExcerptURL'
      data['rtype'] = 'PREVIEW'
      data['mime']  = 'PDF'

      if (params['_imagekey'][0] && params['_piikey'][0])
        data["pii"] = params['_piikey'][0]
        if (match = /.?-[^-]+-([0-9]{4})([0-9]{3}[0-9Xx])([0-9A-Za-z]*)-main.pdf$/.match(params['_imagekey'][0]))
          data['unit_id']          = params['_piikey'][0]
          data['title_id']         = match[1] + match[2]
          data['print_identifier'] = match[1] + '-' + match[2]
        end
      end
    end
  
  elsif (match = /^\/science\/article\/pii\/(([SB])?([0-9]{7}(?:[0-9]{5})?[0-9Xx])[0-9A-Za-z]*)(\/pdf(?:ft)?)?$/.match(path))
                
    data["pii"]    = match[1]
    data['unit_id'] = match[1]
    data['mime']   = match[4] ? 'PDF' : 'HTML'

    if (match[2] == 'B')
      data['rtype']            = 'BOOK_SECTION'
      data['title_id']         = match[3]
      data['print_identifier'] = match[3]
    else
      data['rtype']            = 'ARTICLE'
      data['title_id']         = match[3][0, 8]
      data['print_identifier'] = match[3][0, 4] + "-" + match[3][4, 4]
    end
  

  elsif ((match = /^\/science\/(journal|bookseries|handbooks|handbooks|book)\/([0-9Xx]{8,})(\/[0-9]+)?(\/[0-9]+)?$/.match(path)))
                        
    data['rtype']    = 'TOC'
    data['mime']     = 'MISC'
    data['unit_id']   = match[2]
    data['title_id'] = match[2]

    if (match[3])
      data['unit_id'] += match[3]
    end

    case (match[1])
    when 'journal', 'handbooks', 'bookseries'
      data['print_identifier'] = match[2][0, 4] + "-" + match[2][4, 4]
    when 'book'
      data['print_identifier'] = match[2]
    end

  elsif (match = /^\/science\/MiamiMultiMediaURL\/[^\/]+(S([0-9]{4})([0-9]{3}[0-9Xx])[a-zA-Z0-9]*).*\.pdf$/.match(path))
  
    data["pii"]              = match[1]
    data['unit_id']          = match[1]
    data['title_id']         = match[2] + match[3]
    data['print_identifier'] = match[2] + "-" + match[3]
    data['rtype']            = 'ARTICLE'
    data['mime']             = 'PDF'

  elsif (match = /^\/(([SB])?([0-9]{7}(?:[0-9]{5})?[0-9Xx])[0-9A-Za-z]*)\/[0-9A-Za-z\-.]*-main\.pdf$/.match(path))

    data["pii"]    = match[1]
    data['unit_id'] = match[1]
    data['mime']   = 'PDF'

    if (match[2] === 'B')
      data['rtype']            = 'BOOK_SECTION'
      data['title_id']         = match[3]
      data['print_identifier'] = match[3]
    else
      data['rtype']            = 'ARTICLE'
      data['title_id']         = match[3][0, 8]
      data['print_identifier'] = match[3][0, 4] + "-" + match[3][4, 4]
    end

  elsif (path == '/science/publication')
    data['rtype'] = 'TOC'
    data['mime']  = 'MISC'

    if (params['issn'])
      data['print_identifier'] = params['issn'][0][0, 4] + "-" + params['issn'][0][4, 4]
      data['unit_id']          = data['print_identifier']
      data['title_id']         = params['issn'][0]
    end
  end

  return data
end