class MusicStory::Repository::ArtistXMLFile

Parses an XML file of MusicStory artiste objects. The top-level structure should be <items>…<artistes><artist>…</artist>…<artist>…</artist></artistes></items>.

A formal XSD doesn’t appear to exist, so this is based entirely on data seen so far, together with some small pieces of info (such as the ARTIST_GENRE_RELATIONS and ASSOCIATION_TYPES) gleaned from a brief PDF doc in franglais (descriptionxml_en.pdf).

Some elements mentioned in the PDF (such as collaboration, album, evenement etc) haven’t been seen so far in artist XML files so aren’t handled.

Constants

ARTIST_GENRE_RELATIONS

Codes used in their XML file format:

ASSOCIATION_TYPES

Public Class Methods

new(io) click to toggle source
# File lib/music_story/repository/artist_xml_file.rb, line 13
def initialize(io)
  @reader = Nokogiri::XML::Reader.from_io(io)
end
new_with_open_file(filename) { |new(file)| ... } click to toggle source
# File lib/music_story/repository/artist_xml_file.rb, line 17
def self.new_with_open_file(filename, &block)
  File.open(filename, 'r') do |file|
    yield new(file)
  end
end

Public Instance Methods

each() { |artist({ :id => to_i_or_nil(xpath('//artiste').attr('id').value), :name => xpath('//artiste/nom').inner_text, :forename => unless_empty(xpath('//artiste/prenom').inner_text), :real_name => unless_empty(xpath('//artiste/nom_reel').inner_text), :role => unless_empty(xpath('//artiste/role').inner_text), :type => unless_empty(xpath('//artiste/type').inner_text), :country => unless_empty(xpath('//artiste/pays').inner_text), :summary_html => unless_empty(xpath('//artiste/resume').inner_text), :image_filename => unless_empty(xpath('//artiste/image').inner_text), :bio_html => unless_empty(xpath('//artiste/texte_bio').inner_text), :main_genres => genres, :secondary_genres => genres, :influenced_by_genres => genres, :similar_artists => associations, :influenced_by_artists => associations, :successor_artists => associations| ... } click to toggle source
# File lib/music_story/repository/artist_xml_file.rb, line 39
def each
  @reader.each do |node|
    next unless node.name == 'artiste' && node.node_type == Nokogiri::XML::Reader::TYPE_ELEMENT
    doc = Nokogiri::XML(node.outer_xml)

    # extract genres
    genres = Hash.new {|h,k| h[k]=[]}
    genres_and_relation = doc.xpath('//artiste/genres/genre').map do |node|
      genre = Model::Genre.new(
        :id   => to_i_or_nil(node.attr('id')),
        :name => node.inner_text.strip
      )
      [genre, ARTIST_GENRE_RELATIONS[to_i_or_nil(node.attr('relation'))]]
    end

    genres_and_relation.uniq.each do |genre, relation|
      genres[relation] << genre
    end

    # extract associations
    associations = Hash.new {|h,k| h[k]=[]}
    associated_artists_and_type = doc.xpath('//artiste/associes/associe').map do |node|
      artist = Model::Artist.new({
        :id => to_i_or_nil(node.attr('id_associe')),
        :name => node.attr('nom_associe')
      })
      [artist, ASSOCIATION_TYPES[node.inner_text]]
    end

    associated_artists_and_type.uniq.each do |artist, type|
      # FIXME track non-failing errors, rather than keeping quiet about it
      associations[type] << artist unless invalid_artist?(artist)
    end

    yield Model::Artist.new({
      :id        => to_i_or_nil(doc.xpath('//artiste').attr('id').value),
      :name      => doc.xpath('//artiste/nom').inner_text,
      :forename  => unless_empty(doc.xpath('//artiste/prenom').inner_text),
      :real_name => unless_empty(doc.xpath('//artiste/nom_reel').inner_text),
      :role      => unless_empty(doc.xpath('//artiste/role').inner_text),
      :type      => unless_empty(doc.xpath('//artiste/type').inner_text),
      :country   => unless_empty(doc.xpath('//artiste/pays').inner_text),
      # not sure what the appropriate translation for resume vs texte_bio is here,
      # but in data seen so far they are both the same and both HTML not plain text:
      :summary_html          => unless_empty(doc.xpath('//artiste/resume').inner_text),
      :image_filename        => unless_empty(doc.xpath('//artiste/image').inner_text),

      :bio_html              => unless_empty(doc.xpath('//artiste/texte_bio').inner_text),
      :main_genres           => genres[:main],
      :secondary_genres      => genres[:secondary],
      :influenced_by_genres  => genres[:influenced_by],
      :similar_artists       => associations[:similar],
      :influenced_by_artists => associations[:influenced_by],
      :successor_artists     => associations[:successor]
    })
  end
end
get_all() click to toggle source
# File lib/music_story/repository/artist_xml_file.rb, line 37
def get_all; self; end

Private Instance Methods

invalid_artist?(artist) click to toggle source

basic check that core artist properties are there and correct

# File lib/music_story/repository/artist_xml_file.rb, line 104
def invalid_artist?(artist)
  artist.name.nil? || artist.name.strip.empty? ||
    artist.id.nil? || /[0-9]+/.match(artist.id.to_s).nil?
end
to_i_or_nil(value) click to toggle source
# File lib/music_story/repository/artist_xml_file.rb, line 109
def to_i_or_nil(value)
  /[0-9]+/.match(value.to_s) && value.to_i
end
unless_empty(string) click to toggle source
# File lib/music_story/repository/artist_xml_file.rb, line 98
def unless_empty(string)
  string = string.strip
  string unless string.empty?
end