class EzproxyProviders::Wiley

Public Class Methods

parse(path, params) click to toggle source
# File lib/logstash/filters/ezproxy_providers/wiley.rb, line 4
def self.parse (path, params)

  data = {
    "provider" => "wiley"
  }

  if ((match = /^\/pdf\/(10\.[0-9]+\/([0-9x]+))(\.ch[0-9]+)$/i.match(path)))
    data['rtype']  = 'BOOK_SECTION'
    data['mime']   = 'PDF'
    data['doi']    = match[1]
    data['unit_id'] = match[2] + match[3]
    data['online_identifier'] = match[2]

  elsif ((match = /^\/doi(\/[a-z]+)?\/(10\.[0-9]+\/([a-z0-9._-]+))$/i.match(path)))
    data['doi']    = match[2]
    data['unit_id'] = match[3]

    case (match[1])
    when '/pdf', '/epdf'
     data['rtype'] = 'ARTICLE'
      data['mime']  = 'PDF'

    when '/full'
      data['rtype'] = 'ARTICLE'
      data['mime']  = 'HTML'

    when '/abs'
      data['rtype'] = 'ABS'
      data['mime']  = 'HTML'

    else
      data['rtype'] = 'ARTICLE'
      data['mime']  = 'HTML'
    end
 

  elsif ((match = /^\/toc\/toc\/(([0-9]+)\/([0-9]+)\/([0-9]+))$/i.match(path)))
    data['rtype']    = 'TOC'
    data['mime']     = 'MISC'
    data['unit_id']   = match[1]
    data['title_id'] = match[2]
    data['vol']      = match[3]
    data['issue']    = match[4]

  elsif ((match = /^\/journal\/([0-9]+)$/i.match(path)))
    data['rtype']    = 'TOC'
    data['mime']     = 'MISC'
    data['title_id'] = match[1]
      
  elsif ((match = /\/journal\/(10\.[0-9]+\/(\(ISSN\)([0-9]{4}-[0-9]{3}[0-9xX])))/i.match(path)))
    data['doi']    = match[1]
    data['unit_id'] = match[2]
    data['rtype']  = 'TOC'
    data['mime']   = 'MISC'
    data['online_identifier'] = match[3]

  elsif ((match = /^\/doi\/(10\.[0-9]+\/(([^.]+)\.([0-9]{4})\.[^.]+\.[^.]+))\/issuetoc$/i.match(path)))
    data['doi']      = match[1]
    data['unit_id']   = match[2]
    data['title_id'] = match[3].upcase
    data['rtype']    = 'TOC'
    data['mime']     = 'MISC'
    data['publication_date'] = match[4]

  elsif ((match = /^\/doi\/(10\.[0-9]+\/(j\.([0-9]{4}-[0-9]{3}[0-9xX])\.([0-9]{4})\.[^.]+\.[^.]+))\/abstract$/i.match(path)))
    data['doi']    = match[1]
    data['unit_id'] = match[2]
    data['rtype']  = 'ABS'
    data['mime']   = 'MISC'
    data['online_identifier'] = match[3]
    data['publication_date']  = match[4]

  elsif ((match = /^\/doi\/(10\.[0-9]+\/(([^.]+)\.([0-9]{4})[0-9]+))\/abstract$/i.match(path)))
    data['doi']      = match[1]
    data['unit_id']   = match[2]
    data['title_id'] = match[3].upcase
    data['rtype']    = 'ABS'
    data['mime']     = 'MISC'
    data['publication_date'] = match[4]

  elsif ((match = /^\/doi\/(10\.[0-9]+\/(([^.]+)\.[0-9]+))\/full$/i.match(path)))
    data['doi']      = match[1]
    data['unit_id']   = match[2]
    data['title_id'] = match[3].upcase
    data['rtype']    = 'ARTICLE'
    data['mime']     = 'HTML'

  elsif ((match = /^\/doi\/(10\.[0-9]+\/(j\.([0-9]{4}-[0-9]{3}[0-9xX])\.([0-9]{4})\.[^.]+\.[^.]+))\/pdf$/i.match(path)))
    data['doi']    = match[1]
    data['unit_id'] = match[2]
    data['rtype']  = 'ARTICLE'
    data['mime']   = 'PDF'
    data['online_identifier'] = match[3]
    data['publication_date']  = match[4]

  elsif ((match = /^\/doi\/(10\.[0-9]+\/(([^.]+)\.[0-9]+))\/pdf$/i.match(path)))
    data['doi']      = match[1]
    data['unit_id']   = match[2]
    data['title_id'] = match[3].upcase
    data['rtype']    = 'ARTICLE'
    data['mime']     = 'PDF'

  elsif ((match = /^\/book\/(10\.[0-9]+\/([0-9]+))$/i.match(path)))
    data['doi']      = match[1]
    data['unit_id']   = match[2]
    data['title_id'] = match[2].upcase
    data['rtype']    = 'TOC'
    data['mime']     = 'MISC'

    data['print_identifier']  = match[2]

  elsif (match = /^\/doi\/(10\.[0-9]+\/(([0-9]+)\.[^.]+))\/pdf$/i.match(path))
    data['doi']      = match[1]
    data['unit_id']   = match[2]
    data['title_id'] = match[3].upcase
    data['rtype']    = 'BOOK_SECTION'
    data['mime']     = 'PDF'

    data['print_identifier']  = match[3]

  elsif (match = /^\/enhanced\/doi\/(10\.[0-9]+\/(([^.]+)\.[^\/]+))\/?$/i.match(path))
    data['doi']      = match[1]
    data['unit_id']   = match[2]
    data['title_id'] = match[3].upcase
    data['rtype']    = 'ARTICLE'
    data['mime']     = 'HTML'

  elsif ((match = /^\/enhanced\/doi\/(10\.[0-9]+\/(([0-9]{4})([a-z0-9]{2})[a-z0-9]+))\/?$/i.match(path)))

    data['doi']      = match[1]
    data['unit_id']   = match[2]
    data['title_id'] = match[4].upcase
    data['rtype']    = 'ARTICLE'
    data['mime']     = 'HTML'

    data['publication_date'] = match[3]

  elsif ((match = /^\/agu\/issue\/(10\.[0-9]+\/(([^.]+)\.[^\/]+))\/?$/i.match(path)))
    data['doi']      = match[1]
    data['unit_id']   = match[2]
    data['title_id'] = match[3].upcase
    data['rtype']    = 'TOC'
    data['mime']     = 'HTML'

  elsif (/^\/readcube$/i.match(path))
    data['rtype'] = 'ARTICLE'
    data['mime']  = 'READCUBE'

    if (params.key?("resource"))

      data['doi']    = params["resource"][0]
      data['unit_id'] = params["resource"][0].split('/')[1]
      if ((match = /(10\.[0-9]+)\/([0-9]{4})([a-z0-9]{2})([^\/]+)$/i.match(params['resource'][0])))
          data['title_id'] = match[3].upcase
      end
    end
    


  elsif ((match = /^\/doi\/(10\.[0-9]+\/([a-z]{1}[0-9]{8}([0-9]{2})[a-z0-9]+))\/pdf$/i.match(path)))
    data['doi']      = match[1]
    data['unit_id']   = match[2]
    data['title_id'] = match[2].upcase
    data['rtype']    = 'ARTICLE'
    data['mime']     = 'PDF'

    data['publication_date'] = '20' + match[3]

  elsif ((match = /^\/iucr\/(10\.[0-9]+\/([a-z]{1}[0-9]{8}([0-9]{2})[0-9a-z]+))/i.match(path)))
    data['doi']      = match[1]
    data['unit_id']   = match[2] 
    data['title_id'] = match[2].upcase
    data['rtype']    = 'ARTICLE'
    data['mime']     = 'HTML'

    data['publication_date'] = '20' + match[3]

  elsif ((match = /^\/doi\/(10\.[0-9]+\/([a-z]{1}[0-9]{8}([0-9]{2})[0-9a-z]+))\/([a-z]+)$/i.match(path)))
    data['doi']      = match[1]
    data['unit_id']   = match[2]
    data['title_id'] = match[2].upcase

    data['publication_date'] = '20' + match[3]

    case (match[4])
    when 'abstract'
      data['rtype'] = 'ABS'
      data['mime']  = 'MISC'
    when 'pdf'
      data['rtype'] = 'ARTICLE'
      data['mime']  = 'PDF'
    when 'full'
      data['mime']  = 'PDF'
      data['rtype'] = 'HTML'
    end

  elsif ((match = /^\/store\/(10\.[0-9]+\/(([a-z]+)\.([0-9]{4})[0-9]+))\/asset\/[a-z]+[0-9]+.pdf$/i.match(path)))
    data['doi']      = match[1]
    data['unit_id']   = match[2]
    data['title_id'] = match[3].upcase
    data['mime']     = 'PDF'

    data['publication_date'] = match[4]

  elsif ((match = /^\/doi\/(10\.[0-9]+\/(([0-9]{2,4})([a-z]+)[0-9]+))\/(pdf|full)$/i.match(path)))
    data['doi']      = match[1]
    data['unit_id']   = match[2]
    data['title_id'] = match[4].upcase
    data['rtype']    = 'ARTICLE'
    data['mime']     = match[5] == 'pdf' ? 'PDF' : 'HTML'

    data['publication_date'] = match[3]

    if (match[3].length === 2)
      data['publication_date'] = '19' + match[3]
    end

  elsif ((match = /^\/doi\/(10\.[0-9]+\/([^.]+))\/(pdf|full)$/i.match(path)))
    data['doi']    = match[1]
    data['unit_id'] = match[2]
    data['rtype']  = 'ARTICLE'
    data['mime']   = match[3] === 'pdf' ? 'PDF' : 'HTML'
  end

  return data

end