class XData::FileReader

Constants

GEOMETRIES
RE_A_NAME
RE_GEO
RE_NAME
RE_X
RE_Y

Attributes

content[R]
file[R]
params[R]

Public Class Methods

new(pars) click to toggle source
# File lib/xdata/file_reader.rb, line 85
def initialize(pars)
  @params = pars
  
  if @params[:file_path] =~ /^http(s)?:\/\/.+/
    download
  else
    file_path = File.expand_path(@params[:file_path])
    if File.extname(file_path) == '.xdata'
      read_xdata(file_path)
    else
      ext = @params[:originalfile] ? File.extname(@params[:originalfile]) : File.extname(file_path)
      case ext
        when /\.zip/i
          read_zip(file_path)
        when /\.(geo)?json/i
          read_json(file_path)
        when /\.shp/i
          read_shapefile(file_path)
        when /\.csv|tsv/i
          read_csv(file_path)
        when /\.xdata/i
          read_xdata(file_path)
        when /\.xml/i
          read_xml(file_path)
        else
          raise "Unknown or unsupported file type: #{ext}."
      end
    end
  end
  fillOut
end

Public Instance Methods

download() click to toggle source
# File lib/xdata/file_reader.rb, line 55
def download()
  data = ''
  if @params[:file_path] =~ /\/ODataFeed\//
    open(odata_json(@params[:file_path])) do |f|
      data = XData::parse_json(f.read)
      read_json(nil,data)
    end
  else
    open(@params[:file_path]) do |f|
      data = f.read
    end
    if @params[:file_path] =~ /\.csv$/i 
      read_csv(nil,data)
    elsif @params[:file_path] =~ /\.zip$/i 
      read_zip(nil,data)
    elsif data =~ /^\s*<\?xml/
      read_xml(nil,data)
    else
      begin 
        data = XData::parse_json(data)
        read_json(nil,data)
      rescue XData::Exception
        return
      end
    end
  end
  fillOut
end
fillOut() click to toggle source
# File lib/xdata/file_reader.rb, line 34
def fillOut
  @params[:rowcount] = @content.length
  get_fields          unless @params[:fields]
  guess_name          unless @params[:name]
  guess_srid          unless @params[:srid]
  find_unique_field   unless @params[:unique_id]
  get_address         unless @params[:hasaddress]
  findExtends         unless @params[:bounds]
end
findExtends() click to toggle source
# File lib/xdata/file_reader.rb, line 270
def findExtends
  geometries = []
  if @params[:hasgeometry]
    @content.each do |o|
      o[:geometry][:type] = 'MultiPolygon' if o[:geometry][:type] == 'Multipolygon'
      geometries << Geometry.from_geojson(o[:geometry].to_json)
    end
    geom = GeometryCollection.from_geometries(geometries, (@params[:srid] || '4326'))
    @params[:bounds] = XData.toPolygon(geom.bounding_box())
  elsif @params[:postcode]
    pc = @params[:postcode].to_sym
    @content.each do |o|
      p2 = PC4.lookup(o[:properties][pc])
      if p2
        geometries << GeoRuby::SimpleFeatures::Point.from_coordinates(p2[0], (@params[:srid] || '4326'))
        geometries << GeoRuby::SimpleFeatures::Point.from_coordinates(p2[1], (@params[:srid] || '4326'))
      end
    end
    geom = GeometryCollection.from_geometries(geometries, (@params[:srid] || '4326'))
    @params[:bounds] = XData.toPolygon(geom.bounding_box())
  end
end
find_col_sep(f) click to toggle source
# File lib/xdata/file_reader.rb, line 197
def find_col_sep(f)
  a = f.gets
  b = f.gets
  [";","\t","|"].each do |s|
    return s if (a.split(s).length == b.split(s).length) and b.split(s).length > 1
  end
  ','
end
find_geometry(xfield=nil, yfield=nil) click to toggle source
# File lib/xdata/file_reader.rb, line 294
def find_geometry(xfield=nil, yfield=nil)
  delete_column = (@params[:keep_geom] != true)
  return if @content.blank?
  unless(xfield and yfield)
    @params[:hasgeometry] = nil
    xs = true
    ys = true

    @content[0][:properties].each do |k,v|
      next if k.nil?

      if k.to_s =~ RE_GEO
        srid,g_type = is_wkb_geometry?(v)
        if(srid)
          @params[:srid] = srid
          @params[:geometry_type] = g_type
          @content.each do |h|
            a,b,g = is_wkb_geometry?(h[:properties][k])
            h[:geometry] = g
            h[:properties].delete(k) if delete_column
          end
          @params[:hasgeometry] = k
          return true
        end

        srid,g_type = is_wkt_geometry?(v)
        if(srid)
          @params[:srid] = srid
          @params[:geometry_type] = g_type
          @content.each do |h|
            a,b,g = is_wkt_geometry?(h[:properties][k])
            h[:geometry] = g
            h[:properties].delete(k) if delete_column
          end
          @params[:hasgeometry] = k
          return true
        end

        srid,g_type = is_geo_json?(v)
        if(srid)
          @params[:srid] = srid
          @params[:geometry_type] = g_type
          @content.each do |h|
            h[:geometry] = h[:properties][k]
            h[:properties].delete(k) if delete_column
          end
          @params[:hasgeometry] = k
          return true
        end

      end

      hdc = k.to_s.downcase
      if hdc == 'longitude' or hdc == 'lon' or hdc == 'x'
        xfield=k; xs=false
      end
      if hdc == 'latitude' or hdc == 'lat' or hdc == 'y'
        yfield=k; ys=false
      end
      xfield = k if xs and (hdc =~ RE_X)
      yfield = k if ys and (hdc =~ RE_Y)
    end
  end

  if xfield and yfield and (xfield != yfield)
    @params[:hasgeometry] = [xfield,yfield]
    @content.each do |h|
      h[:properties][xfield] = h[:properties][xfield] || ''
      h[:properties][yfield] = h[:properties][yfield] || ''
      h[:geometry] = {:type => 'Point', :coordinates => [h[:properties][xfield].gsub(',','.').to_f, h[:properties][yfield].gsub(',','.').to_f]}
      h[:properties].delete(yfield) if delete_column
      h[:properties].delete(xfield) if delete_column
    end
    @params[:geometry_type] = 'Point'
    @params[:fields].delete(xfield) if @params[:fields] and delete_column
    @params[:fields].delete(yfield) if @params[:fields] and delete_column
    return true
  elsif (xfield and yfield)
    # factory = ::RGeo::Cartesian.preferred_factory()
    @params[:hasgeometry] = [xfield]
    @content.each do |h|
      h[:geometry] = geom_from_text(h[:properties][xfield])
      h[:properties].delete(xfield) if h[:geometry] and delete_column
    end
    @params[:geometry_type] = ''
    @params[:fields].delete(xfield) if @params[:fields] and delete_column
    return true
  end
  false
end
find_unique_field() click to toggle source
# File lib/xdata/file_reader.rb, line 134
def find_unique_field
  fields = {}
  @params[:unique_id] = nil
  @content.each do |h|
    h[:properties].each do |k,v|
      fields[k] = Hash.new(0) if fields[k].nil?
      fields[k][v] += 1
    end
  end

  fields.each_key do |k|
    if fields[k].length == @params[:rowcount]
      @params[:unique_id] = k
      break
    end
  end

end
geom_from_text(coords) click to toggle source
# File lib/xdata/file_reader.rb, line 251
def geom_from_text(coords)
  # begin
  #   a = factory.parse_wkt(coords)
  # rescue
  # end

  if coords =~ /^(\w+)(.+)/
    if GEOMETRIES.include?($1.downcase)
      type = $1.capitalize
      coor = $2.gsub('(','[').gsub(')',']')
      coor = coor.gsub(/([-+]?[0-9]*\.?[0-9]+)\s+([-+]?[0-9]*\.?[0-9]+)/) { "[#{$1},#{$2}]" }
      coor = JSON.parse(coor)
      return { :type => type,
        :coordinates => coor }
    end
  end
  {}
end
get_address() click to toggle source
# File lib/xdata/file_reader.rb, line 117
def get_address
  pd = pc = hn = ad = false
  @params[:housenumber] = nil
  @params[:hasaddress] = 'unknown'
  @params[:postcode] = nil
  @params[:fields].reverse.each do |f|
    pc = f if ( f.to_s =~ /^(post|zip|postal)code.*/i )
    hn = f if ( f.to_s =~ /huisnummer|housenumber|(house|huis)(nr|no)|number/i)
    ad = f if ( f.to_s =~ /address|street|straat|adres/i)
  end
  if pc and (ad or hn)
    @params[:hasaddress] = 'certain'
  end
  @params[:postcode] = pc
  @params[:housenumber] = hn ? hn : ad
end
get_fields() click to toggle source
# File lib/xdata/file_reader.rb, line 166
def get_fields
  @params[:fields] = []
  @params[:alternate_fields] = {}
  return if @content.blank?
  @content[0][:properties].each_key do |k|
    k = (k.to_sym rescue k) || k
    @params[:fields] << k
    @params[:alternate_fields][k] = k
  end
end
guess_name() click to toggle source
# File lib/xdata/file_reader.rb, line 153
def guess_name
  @params[:name] = nil
  @params[:fields].reverse.each do |k|
    if(k.to_s =~ RE_A_NAME)
      @params[:name] = k
      return
    end
    if(k.to_s =~ RE_NAME)
      @params[:name] = k
    end
  end
end
guess_srid() click to toggle source
# File lib/xdata/file_reader.rb, line 177
def guess_srid
  return if @content.blank?
  return unless @content[0][:geometry] and @content[0][:geometry].class == Hash
  @params[:srid] = 4326
  g = @content[0][:geometry][:coordinates]
  if(g)
    while g[0].is_a?(Array)
      g = g[0]
    end
    lon = g[0]
    lat = g[1]
    if lon.between?(-7000.0,300000.0) and lat.between?(289000.0,629000.0)
      # Simple minded check for Dutch new rd system
      @params[:srid] = 28992
    end
  else

  end
end
is_geo_json?(s) click to toggle source
# File lib/xdata/file_reader.rb, line 231
def is_geo_json?(s)
  return nil if s.class != Hash
  begin
    if GEOMETRIES.include?(s[:type].downcase)
      srid = 4326
      if s[:crs] and s[:crs][:properties]
        if s[:crs][:type] == 'OGC'
          urn = s[:crs][:properties][:urn].split(':')
          srid = urn.last.to_i if (urn[4] == 'EPSG')
        elsif s[:crs][:type] == 'EPSG'
          srid = s[:crs][:properties][:code]
        end
      end
      return srid,s[:type],s
    end
  rescue Exception=>e
  end
  nil
end
is_wkb_geometry?(s) click to toggle source
# File lib/xdata/file_reader.rb, line 206
def is_wkb_geometry?(s)
  begin
    f = GeoRuby::SimpleFeatures::GeometryFactory::new
    p = GeoRuby::SimpleFeatures::HexEWKBParser.new(f)
    p.parse(s)
    g = f.geometry
    return g.srid,g.as_json[:type],g
  rescue => e
  end
  nil
end
is_wkt_geometry?(s) click to toggle source
# File lib/xdata/file_reader.rb, line 218
def is_wkt_geometry?(s)
  begin
    f = GeoRuby::SimpleFeatures::GeometryFactory::new
    p = GeoRuby::SimpleFeatures::EWKTParser.new(f)
    p.parse(s)
    g = f.geometry
    return g.srid,g.as_json[:type],g
  rescue => e
  end
  nil
end
odata_json(url) click to toggle source
# File lib/xdata/file_reader.rb, line 44
def odata_json(url)
  if url =~ /\/ODataFeed\//
    uri = URI.parse(url)
    return url + '?$format=json' if uri.query.nil?
    pars = CGI.parse(uri.query)
    return url if pars["$format"]
    return url + '&$format=json'
  end
  return url
end
parseODataFields(props) click to toggle source
# File lib/xdata/file_reader.rb, line 494
def parseODataFields(props)
  rank=1
  @params[:md] = {} if @params[:md].nil?
  props.each do |p|
    @params[:md]["fieldUnit.#{rank}".to_sym] = p[:Unit]
    @params[:md]["fieldDescription.#{rank}".to_sym] = p[:Description]
    @params[:md]["fieldLabel.#{rank}".to_sym] = p[:Key]
    rank += 1
  end
end
parseODataMeta(md) click to toggle source
# File lib/xdata/file_reader.rb, line 481
def parseODataMeta(md)
  @params[:md] = {} if @params[:md].nil?
  @params[:md][:title] = md[:Title]
  @params[:md][:identifier] = md[:Identifier]
  @params[:md][:description] = md[:Description]
  @params[:md][:abstract] = md[:ShortDescription]
  @params[:md][:modified] = md[:Modified]
  @params[:md][:temporal] = md[:Period]
  @params[:md][:publisher] = md[:Source]
  @params[:md][:accrualPeriodicity] = md[:Frequency]
  @params[:md][:language] = md[:Language]
end
proces_zipped_dir(d) click to toggle source
# File lib/xdata/file_reader.rb, line 615
def proces_zipped_dir(d)
  Dir.foreach(d) do |f|

    next if f =~ /^\./
    
    if File.directory?(d + '/' + f)
      return true if proces_zipped_dir(d + '/' + f)
    end

    case File.extname(f)
      when /\.(geo)?json/i
        read_json(d+'/'+f)
        return true
      when /\.shp/i
        read_shapefile(d+'/'+f)
        return true
      when /\.csv|tsv/i
        read_csv(d+'/'+f)
        return true
    end
  end
  return false
end
read_csv(path, c = nil) click to toggle source
# File lib/xdata/file_reader.rb, line 385
def read_csv(path, c = nil)
  if path 
    File.open(path, "r:bom|utf-8") do |fd|
      c = fd.read
    end
  end

  unless @params[:utf8_fixed]
    detect = CharlockHolmes::EncodingDetector.detect(c)
    c =     CharlockHolmes::Converter.convert(c, detect[:encoding], 'UTF-8') if detect
  end
  c = c.force_encoding('utf-8')
  c = c.gsub(/\r\n?/, "\n")
  @content = []
  @params[:format] = 'CSV'
  @params[:colsep] = find_col_sep(StringIO.new(c)) unless @params[:colsep]
  csv = CSV.new(c, :col_sep => @params[:colsep], :headers => true, :skip_blanks =>true)
  csv.header_convert { |h| h.blank? ? '_' : h.strip.gsub(/\s+/,'_')  }
  csv.convert { |h| h ? h.strip : '' }
  index = 0
  begin
    csv.each do |row|
      r = row.to_hash
      h = {}
      r.each do |k,v|
        h[(k.to_sym rescue k) || k] = v
      end
      @content << {properties: h }
      index += 1
    end
  rescue => e
    raise XData::Exception.new("Read CSV; line #{index}; #{e.message}")
  end
  find_geometry
end
read_json(path, hash=nil) click to toggle source
# File lib/xdata/file_reader.rb, line 421
def read_json(path, hash=nil)
  
  STDERR.puts hash.class if hash
  
  @content = []
  if path
    data = ''
    File.open(path, "r:bom|utf-8") do |fd|
      data = fd.read
    end
    hash = XData::parse_json(data)
  end
  
  if hash.is_a?(Hash) and hash[:'odata.metadata']
    read_odata(hash)
  elsif hash.is_a?(Hash) and hash[:type] and (hash[:type] == 'FeatureCollection')
    # GeoJSON
    hash[:features].each do |f|
      f.delete(:type)
      @content << f
    end
    @params[:hasgeometry] = @params[:format] = 'GeoJSON'

  else
    # Free-form JSON
    @params[:format] = 'JSON'
    val,length = nil,0
    if hash.is_a?(Array)
       # one big array
       val,length = hash,hash.length
    else
      hash.each do |k,v|
        if v.is_a?(Array)
          # the longest array value in the Object
          val,length = v,v.length if v.length > length
        end
      end
    end

    if val
      val.each do |h|
        @content << { :properties => h }
      end
    end
    find_geometry
  end
end
read_odata(h) click to toggle source
# File lib/xdata/file_reader.rb, line 505
def read_odata(h)
  @content = []
  @params[:format] = "OData"
  @params[:odata] = {}
  links = h[:value]
  links.each do |l|
    @params[:odata][l[:name].to_sym] = l[:url]
  end
  
  begin
    open(odata_json(@params[:odata][:TableInfos])) do |f|
      md = XData::parse_json(f.read)[:value]
      parseODataMeta(md[0])
    end

    open(odata_json(@params[:odata][:DataProperties])) do |f|
      props = XData::parse_json(f.read)[:value]
      parseODataFields(props)
    end

    open(odata_json(@params[:odata][:TypedDataSet])) do |f|
      c = XData::parse_json(f.read)[:value]
      c.each do |h|
        @content << { :properties => h }
      end
    end

  rescue OpenURI::HTTPError => e
    STDERR.puts e.message
  end

  find_geometry
end
read_shapefile(path) click to toggle source
# File lib/xdata/file_reader.rb, line 540
def read_shapefile(path)

  @content = []

  prj = path.gsub(/.shp$/i,"") + '.prj'
  prj = File.exists?(prj) ? File.read(prj) : nil
  srid_from_prj(prj) if (prj and @params[:srid].nil?)

  @params[:hasgeometry] = 'ESRI Shape'
  @params[:format] = "Shape File"

  GeoRuby::Shp4r::ShpFile.open(path) do |shp|
    shp.each do |shape|
      h = {}
      h[:geometry] = XData::parse_json(shape.geometry.to_json) #a GeoRuby SimpleFeature
      h[:properties] = {}
      att_data = shape.data #a Hash
      shp.fields.each do |field|
        s = att_data[field.name]
        s = s.force_encoding('ISO8859-1') if s.class == String
        h[:properties][field.name.to_sym] = s
      end
      @content << h
    end
  end
end
read_xdata(path) click to toggle source
# File lib/xdata/file_reader.rb, line 608
def read_xdata(path)
  h = Marshal.load(File.read(path))
  @params = h[:config]
  @content = h[:content]
end
read_xml(path, data=nil) click to toggle source
# File lib/xdata/file_reader.rb, line 567
def read_xml(path, data=nil)
  if path
    File.open(path, "r:bom|utf-8") do |fd|
      data = fd.read
    end
  end
  begin 
    feed = Feedjira::Feed.parse(data)
    if feed 
      @params[:format] = "Atom Feed"
      maxlat = -1000
      maxlon = -1000
      minlat = 1000
      minlon = 1000
      doc = Nokogiri::XML data
      a = doc.xpath("//georss:polygon")
      if a.length > 0
        # geometries << GeoRuby::SimpleFeatures::Point..from_latlong(lat, lon)
        a.each do |x|
          # 50.6 3.1 50.6 7.3 53.7 7.3 53.7 3.1 50.6 3.1
          s = x.text.split(/\s+/)
          s.each_slice(2) { |c| 
            maxlat = [maxlat,c[0].to_f].max
            maxlon = [maxlon,c[1].to_f].max
            minlat = [minlat,c[0].to_f].min
            minlon = [minlon,c[1].to_f].min
          }
        end
        @params[:bounds] = { type: 'Polygon', coordinates: [[minlon,minlat], [minlon,maxlat], [maxlon,maxlat], [maxlon,minlat], [minlon,minlat]] }
      end
    end
    # url = feed.entries[0].url
    # Dir.mktmpdir("xdfi_#{File.basename(path).gsub(/\A/,'')}") do |dir|
    #   f = dir + '/' + File.basename(path)
    # end
  rescue Exception => e
    puts e.inspect
    return -1
  end
end
read_zip(path, data=nil) click to toggle source
# File lib/xdata/file_reader.rb, line 639
def read_zip(path, data=nil)
  tempfile = nil
  begin
    
    if(data)
      tempfile = Tempfile.new('xdatazip')
      tempfile.write(data)
      path = tempfile.path  
    end

    Dir.mktmpdir("xdfi_#{File.basename(path).gsub(/\A/,'')}") do |dir|
      command = "unzip '#{path}' -d '#{dir}' > /dev/null 2>&1"
      raise XData::Exception.new("Error unzipping #{path}.", {:originalfile => path}, __FILE__, __LINE__) if not system command
      if File.directory?(dir + '/' + File.basename(path).chomp(File.extname(path)))
        dir = dir + '/' + File.basename(path).chomp(File.extname(path) )
      end
      return if proces_zipped_dir(dir)
    end
  rescue Exception => e
    raise XData::Exception.new(e.message, {:originalfile => path}, __FILE__, __LINE__)
  ensure
    tempfile.unlink if tempfile
  end
  raise XData::Exception.new("Could not process file #{path}", {:originalfile => path}, __FILE__, __LINE__)
end
srid_from_prj(str) click to toggle source
# File lib/xdata/file_reader.rb, line 469
def srid_from_prj(str)
  begin
    connection = Faraday.new :url => "http://prj2epsg.org"
    resp = connection.get('/search.json', {:mode => 'wkt', :terms => str})
    if resp.status.between?(200, 299)
      resp = XData::parse_json resp.body
      @params[:srid] = resp[:codes][0][:code].to_i
    end
  rescue
  end
end
write(path=nil) click to toggle source
# File lib/xdata/file_reader.rb, line 665
def write(path=nil)
  path = @file_path if path.nil?
  path = path + '.xdata'
  begin
    File.open(path,"w") do |fd|
      fd.write( Marshal.dump({:config=>@params, :content=>@content}) )
    end
  rescue
    return nil
  end
  return path
end