class EzproxyProviders::Springer

Public Class Methods

parse(path, params) click to toggle source
# File lib/logstash/filters/ezproxy_providers/springer.rb, line 5
def self.parse (path, params)

  data = {
    "provider" => "springer"
  }

  if ((match = /\/journal(\/volumesAndIssues)?\/([0-9]+)/.match(path)))
    data['title_id'] = match[2]
    data['unit_id']   = match[2]
    data['rtype']    = 'TOC'
    data['mime']     = 'MISC'

  elsif ((match = /^\/(article|book|protocol)\/([0-9]+\.[0-9]+\/[^\/]+)(\/page\/[0-9]+)?(\/fulltext.html)?/.match(path)))
    data['doi']    = match[2]
    data['unit_id'] = match[2].split('/')[1] + (match[3] || '')

    case (match[1])
    when 'article'
      data['rtype'] = 'ARTICLE'
      data['mime']  = 'HTML'
    when 'book'
      data['rtype'] = 'BOOK'
      data['mime']  = 'HTML'
      
      if (/^\/book\/([0-9]+\.[0-9]+\/([0-9-])+)$/.match(path))
        data['rtype']             = 'TOC'
        data['mime']              = 'MISC'
        data['online_identifier'] = match[2].split('/')[1]
      end

    when 'protocol'
      data['rtype'] = 'BOOK'
      data['mime']  = 'HTML'
    end

  elsif ((match = /^\/content\/pdf\/(10\.[0-9]+\/(.+?))(\.pdf)?$/.match(path)))

    data['doi']    = match[1]
    data['unit_id'] = match[2]
    data['rtype']  = 'ARTICLE'
    data['mime']   = 'PDF'

    if (/^(\d-*){13}(?![\d-])/.match(match[2]))
      data['rtype'] = 'BOOK'
    end


  elsif ((match = /^\/content\/([0-9]{4}-[0-9]{4})/.match(path)))
    data['print_identifier'] = match[1]
    data['unit_id']           = match[1]
    data['rtype']            = 'TOC'
    data['mime']             = 'MISC'

  elsif ((match = /^\/content\/([a-zA-Z0-9]+)(\/fulltext.pdf)?/.match(path)))
    data['unit_id'] = match[1]
    data['rtype']  = 'ABS'
    data['mime']   = 'MISC'

  elsif ((match = /^\/chapter\/(([0-9]+\.[0-9]+)\/([^\/]*))(\/([a-z]+)\.html)?/.match(path)))
    data['doi']    = match[1]
    data['unit_id'] = match[3]
    data['rtype']  = 'BOOK_SECTION'
    data['mime']   = 'HTML'

  elsif ((match = /^\/(book)?series\/([0-9]+)/.match(path)))
    data['title_id'] = match[2]
    data['unit_id']   = match[2]
    data['rtype']    = 'BOOKSERIE'
    data['mime']     = 'MISC'

  elsif ((match = /^\/openurl.asp/.match(path)))
    if (params.key?('genre') && params['genre'][0] == 'journal')
      if (params['issn'][0])
        data['print_identifier'] = params['issn'][0]
        data['unit_id'] = params['issn'][0]
      end

      data['rtype'] = 'TOC'
      data['mime']  = 'MISC'
    end

  elsif ((match = /^\/static\/pdf\/([0-9]+)\/([a-zA-Z]{3})([^\/]+)\.pdf/.match(path)))
    if ((params.key('ext') && params['ext'] == '.pdf') || params['token2'])
      data['title_id'] = match[1]
      data['mime'] = 'PDF'
      data['unit_id'] = URI::unescape(match[3])[1..-1]

      case match[2]
      when 'art'
        data['unit_id'] = data['unit_id'].split('/')[1]
        data['doi']   = URI::unescape(match[3])[1..-1]
        data['rtype'] = 'ARTICLE'

      when 'chp'
        data['unit_id'] = data['unit_id'].split('/')[1]
        data['doi']   = URI::unescape(match[3])[1..-1]
        data['rtype'] = 'BOOK_SECTION'

      when 'bok'
        data['online_identifier'] = data['unit_id']
        data['rtype'] = 'BOOK'

      when 'bfm'
        data['online_identifier'] = data['unit_id'].split('/')[0]
        data['rtype'] = 'TOC'

      else
        data['rtype'] = 'TOC'
      end
    end


  elsif ((match = /^\/(download|static)\/([a-z]+)\/(([0-9.]*)\/([^\/]*)).epub/.match(path)))
    if (/([0-9]+)\.([0-9]+)/.match(match[4]))
      data['doi']      = match[3]
      data['unit_id']   = match[5] + '.epub'
      data['print_identifier'] = match[5]
    else
      if (testunitid = /([\w%]*)[A-Z]{1}([0-9-]+)/.match(match[5]))
        data['unit_id']   = testunitid[2] + '.epub'
        data['print_identifier'] = testunitid[2]
      end
    end

    data['rtype']    = 'BOOK'
    data['mime']     = 'EPUB'
  end

  if (data['doi'])
    title_id = /s([0-9]+)-/.match(data['doi'])
    if (title_id && title_id[1])
      data['title_id'] = '' + Integer(title_id[1], 10).to_s;
    end
  end  

  return data;
end