class PubmedAPI::XMLParser

Constants

AuthorStruct
JournalStruct
LinkStruct
PaperStruct
SearchResult

Public Instance Methods

parse_authors(authors) click to toggle source
# File lib/pubmed_api/parsers.rb, line 167
def parse_authors(authors)

  authors_output  =[]

  authors.each do |node|
    author_arr =  Array.new(3,"")

    if v = node.at_css('ForeName')
      author_arr[0] = v.text
    end 

    if v = node.at_css('Initials')
      author_arr[1] = v.text
    end 

    if v = node.at_css('LastName')
      author_arr[2] = v.text
    end

    authors_output << author_arr
  end

  return authors_output
end
parse_journals(journals_xml) click to toggle source
# File lib/pubmed_api/parsers.rb, line 115
def parse_journals(journals_xml)

  j_struc_arr = []
  
  journals_xml.each do |j|
    j_struc = JournalStruct.new(j.css('ISSN').text, j.css('NlmUniqueID').text, j.xpath('./TitleMain/Title').text,
                                j.css('MedlineTA').text, j.css('PublicationFirstYear').text, j.css('Frequency').text)
    j_struc_arr << j_struc
  end

  j_struc_arr
end
parse_papers(papers_xml) click to toggle source
# File lib/pubmed_api/parsers.rb, line 35
def parse_papers(papers_xml)

  results = []

  papers_xml.each do |paper|
    
    #check it's actually a paper
    if paper.xpath('/*/*').first.name().eql?('PubmedArticle')

      
      paper_output = PaperStruct.new
   
      paper_output.title = paper.at('ArticleTitle').text

      begin
        paper_output.abstract = paper.at('Abstract').text
      rescue NoMethodError
      
      end
      
      begin
        #Date in Y/M/D format
        article_date =  Date.new( paper.at('ArticleDate/Year').text.to_i,  paper.at('ArticleDate/Month').text.to_i, paper.at('ArticleDate/Day').text.to_i)
        paper_output.article_date = article_date 
      rescue NoMethodError
         #puts "no date " +  " " + paper.css('PMID').text + " " + paper.css('ArticleTitle').text
         paper_output.article_date =  Date.new()
      end

      #Parse mutlitple PubMedPubDate dates
      dates = paper.css('PubMedPubDate')

      paper_output.pmid =  parse_pmid(paper.css('PMID').text)

      pub_date = [0,0,0]

      dates.each do |node|
        if node.attributes["PubStatus"].to_s == "entrez"
          pub_date = Date.new( node.at('Year').text.to_i,  node.at('Month').text.to_i, node.at('Day').text.to_i)
          paper_output.pubmed_date = pub_date
          paper_output.date_appeared = pub_date
        end
      end

      ids = paper.css('ArticleId')
    
      ids.each do |node|
        v = node.attributes["IdType"].to_s
        if v == 'doi'
          paper_output.doi = node.text
        end
      end


      #Extract the authors as friendly string for now...
      #TODO handle authors properly
      authors = paper.css('Author')
      auth_arr = parse_authors(authors)
      
      author_string = ''

      auth_arr.each do |a|
        author_string += a[1] + ' ' + a[2] +', '
      end
      
      #cut additional ', ' off end
      author_string = author_string[0..-3]
      paper_output.authors = author_string
      paper_output.nlmid = paper.css('NlmUniqueID').text
      
      
      results << paper_output
    end
  end
  
  return results
end
parse_pmid(pmid) click to toggle source
# File lib/pubmed_api/parsers.rb, line 128
def parse_pmid(pmid)
  pmid = pmid.gsub('.', '')

  if pmid.length > 8
    pmid = pmid[0,8]
  end
  pmid
end