class EzproxyProviders::LexisWebAnalytics

Public Class Methods

parse(path, params, uri) click to toggle source
# File lib/logstash/filters/ezproxy_providers/lexis_webanalytics.rb, line 5
def self.parse (path, params, uri)

  raw_url = uri.to_s

  data = {
      "provider" => "lexisnexis_webanalytics"
  }

  if (match = /^\/Document\/([\w]+)\/([\w-]+)$/i.match(path))
    data['rtype'] = 'TOC'
    data['mime']   = 'HTML'
    data['unit_id'] = params['rndNum'][0]

    if ((match = /([a-z_]+)_(\d+_\w+_\d+)_n_(\d+)/i.match(match[1])))
      data['title_id'] = match[1]
    end


  elsif ((match = /^\/Docview.aspx$/i.match(path)))
    data['rtype']    = 'ARTICLE'
    data['mime']     = 'HTML'
    citationData = JSON.parse(params['citationData'][0])
    data['unit_id']   = citationData['docId']
    data['title_id'] = citationData['docId'].split('_')[1]
  

  elsif ((match = /^\/wa_k4c.watag$/i.match(path)))
    if ((match3 = /&wa_DocId=([0-9a-zA-Z_-]+)&/i.match(raw_url)))
      data['unit_id'] = match3[1]
      if ((match3a = /PS_([A-Z]+)/.match(data['unit_id'])))
        data['title_id'] = match3a[1]
      end
    end
  
    if ((match4 = /&wa_DocSourceType=([0-9a-z%é_]+)&/i.match(raw_url)))
      docSourceType = match4[1]
      
      if (docSourceType == 'FicheMethodo' || docSourceType == 'FicheRevision')
        data['rtype']    = 'ENCYCLOPAEDIA_ENTRY'
        data['mime']     = 'HTML'
      
      elsif (docSourceType == 'PresseSommaire')
        data['rtype']    = 'TOC'
        data['mime']     = 'HTML'
      
      elsif (docSourceType == 'Presse')
        data['rtype']    = 'ARTICLE'
        data['mime']     = 'HTML'
      
      elsif (docSourceType == 'En_eFascicule')
        if ((match5 = /&wa_UserAction=([a-zA-Z]+)&/i.match(raw_url)))
          userAction = match5[1]

          if (userAction == 'ViewDoc' || userAction == 'ChangeToc')
            data['rtype']    = 'ENCYCLOPAEDIA_ENTRY'
            data['mime']     = 'HTML'
          end
        end
      end
    end
    

    if (/L[é%C3A9]+gislationconsolid[é%C3A9]+e/.match(docSourceType))
      if ((match6 = /&wa_UserAction=([a-zA-Z]+)&/i.match(raw_url)))
        userAction = match6[1]

        if (userAction == 'ViewDoc' || userAction == 'ChangeToc')
          data['rtype'] = 'CODES'
          data['mime']  = 'HTML'
        end
      end
    end
  end
      
  return data

end