module XLSX2Shape

Public Instance Methods

escape_turtle(str) click to toggle source
# File lib/xlsx2shape.rb, line 119
def escape_turtle(str)
  str.gsub(/\\/){ '\\\\' }.gsub(/"/){ '\"' }
end
format_property(property, value, lang = nil) click to toggle source
# File lib/xlsx2shape.rb, line 106
def format_property(property, value, lang = nil)
  if value.is_a? Array
    value = value.sort_by{|e|
      format_pvalue(e)
    }.map do |e|
      format_pvalue(e)
    end
    %Q|  #{property} #{ value.join(", ") }|
  else
    value = format_pvalue(value, lang)
    %Q|  #{property} #{value}|
  end
end
format_pvalue(value, lang = nil) click to toggle source
# File lib/xlsx2shape.rb, line 80
def format_pvalue(value, lang = nil)
  str = ""
  if value.is_a? Hash
    result = ["["]
    array = []
    value.keys.sort.each do |k|
      array << format_property(k, value[k])
    end
    result << array.join(";\n")
    result << "  ]"
    str = result.join("\n")
  elsif value.is_a? Integer
    str = value
  elsif value =~ /\Ahttps?:\/\//
    str = %Q|<#{value}>|
  elsif value =~ /\A\w+:[\w\-\.]+\Z/
    str = value
  elsif value =~ /\A(.+?)\^\^(\w+:\w+)\z/
    str = %Q|"#{escape_turtle($1)}"^^#{$2}|
  elsif lang
    str = %Q|"#{escape_turtle(value)}"@#{lang}|
  else
    str = %Q|"#{escape_turtle(value)}"|
  end
  str
end
map_xlsx_row_headers(data_row, headers) click to toggle source
# File lib/xlsx2shape.rb, line 73
def map_xlsx_row_headers(data_row, headers)
  hash = {}
  headers.each_with_index do |h, idx|
    hash[h] = data_row[idx].to_s
  end
  hash
end
xlsx2shape(filename) click to toggle source
# File lib/xlsx2shape.rb, line 6
def xlsx2shape(filename)
  shapes = {}
  prefix = { sh: "http://www.w3.org/ns/shacl#" }
  xlsx = Roo::Excelx.new(filename)
  xlsx.each_with_pagename do |name, sheet|
    if name =~ /\Aprefix\z/i
      sheet.each do |row|
        prefix[row[0].to_s.intern] = row[1] if not row[1].empty?
      end
    else
      headers = sheet.row(1)
      uri = headers.first
      shapes[uri] = ["<#{uri}> a sh:NodeShape"]
      order = 1
      sheet.each_with_index do |row, idx|
        row_h = map_xlsx_row_headers(row, headers)
        case row.first
        when "sh:targetClass"
          shapes[uri] << "#{format_property("sh:targetClass", row[1])}" if row[1]
        when "sh:property"
          prop_values = []
          headers[1..-1].each do |prop|
            next if row_h[prop].empty?
            case prop
            when /\@(\w+)\z/
              lang = $1
              property_name = prop.sub(/\@(\w+)\z/, "")
              prop_values << format_property(property_name, row_h[prop], lang)
            when "sh:minCount", "sh:maxCount"
              prop_values << format_property(prop, row_h[prop].to_i)
            when "sh:languageIn"
              prop_values << "  sh:languageIn (#{row_h[prop].split.map{|e| format_pvalue(e) }.join(" ")})"
            when "sh:uniqueLang"
              case row_h[prop]
              when "true"
                prop_values << "  sh:uniqueLang true"
              when "false"
                prop_values << "  sh:uniqueLang false"
              else
                logger.warn "sh:uniqueLang value unknown: #{row_h[prop]} at #{uri}"
              end
            else
              prop_values << format_property(prop, row_h[prop])
            end
          end
          prop_values << format_property("sh:order", order)
          order += 1
          str = prop_values.join(";\n  ")
          shapes[uri] << "  sh:property [\n  #{str}\n  ]"
        when "sh:or"
          shapes[uri] << "  sh:or (#{row[1..-1].select{|e| not e.empty? }.map{|e| format_pvalue(e) }.join(" ")})"
        end
      end
    end
  end
  result = ""
  prefix.sort_by{|k,v| [k,v] }.each do |prefix, val|
    result << "@prefix #{prefix}: <#{val}>.\n"
  end
  shapes.sort_by{|uri, val| uri }.each do |uri, val|
    result << "\n"
    result << shapes[uri].join(";\n")
    result << ".\n"
  end
  result
end