class Geocoder::US::Database

Provides an interface to a Geocoder::US database.

Constants

City_Weight
Number_Weight
Parity_Weight
Street_Weight

Public Class Methods

new(filename, options = {}) click to toggle source

Takes the path of an SQLite 3 database prepared for Geocoder::US as the sole mandatory argument. The helper argument points to the Geocoder::US SQLite plugin; the module looks for this in the same directory as database.rb by default. The cache_size argument is measured in kilobytes and is used to set the SQLite cache size; larger values will trade memory for speed in long-running processes.

# File lib/geocoder/us/database.rb, line 30
def initialize (filename, options = {})
  defaults = {:debug => false, :cache_size => 50000,
              :helper => "sqlite3.so", :threadsafe => false,
              :create => false}
  options = defaults.merge options
  raise ArgumentError, "can't find database #{filename}" \
    unless options[:create] or File.exists? filename
  @db = SQLite3::Database.new( filename )
  @st = {}
  @debug = options[:debug]
  @threadsafe = options[:threadsafe]
  tune options[:helper], options[:cache_size]
end

Public Instance Methods

add_ranges!(address, candidates) click to toggle source
# File lib/geocoder/us/database.rb, line 379
def add_ranges! (address, candidates)
  number = address.number.to_i
  fids   = unique_values candidates, :fid
  ranges = ranges_by_feature fids, number, address.prenum
  ranges = ranges_by_feature fids, number, nil unless !ranges.empty?
  merge_rows! candidates, ranges, :fid
  assign_number! number, candidates
end
assign_number!(hn, candidates) click to toggle source

Given a query hash and a list of candidates, assign :number and :precision values to each candidate. If the query building number is inside the candidate range, set the number on the result and set the precision to :range; otherwise, find the closest corner and set precision to :street.

# File lib/geocoder/us/database.rb, line 364
def assign_number! (hn, candidates)
  hn = 0 unless hn
  for candidate in candidates
    fromhn, tohn = candidate[:fromhn].to_i, candidate[:tohn].to_i
    if (hn >= fromhn and hn <= tohn) or (hn <= fromhn and hn >= tohn)
      candidate[:number] = hn.to_s
      candidate[:precision] = :range
    else
      candidate[:number] = ((hn - fromhn).abs < (hn - tohn).abs ?
                            candidate[:fromhn] : candidate[:tohn]).to_s
      candidate[:precision] = :street
    end
  end
end
best_candidates!(candidates) click to toggle source

Find the candidates in a list of candidates that are tied for the top score and prune the remainder from the list.

# File lib/geocoder/us/database.rb, line 467
def best_candidates! (candidates)
  candidates.sort! {|a,b| b[:score] <=> a[:score]}
  #candidates.reverse_each {|c| print "#{c[:number]} #{c[:state]} #{c[:city]} #{c[:raw_score]} #{c[:number_score]} #{c[:street_score]} #{c[:city_score]}\n" }
  candidates.delete_if {|record| record[:score] < candidates[0][:score]}
end
best_places(address, places, canonicalize=false) click to toggle source
# File lib/geocoder/us/database.rb, line 590
def best_places (address, places, canonicalize=false)
  return [] unless !places.empty?
  score_candidates! address, places
  best_candidates! places 
  canonicalize_places! places if canonicalize

  # uniqify places
  by_name = rows_to_h(places, :city, :state)
  if !by_name.nil?
    begin
      by_name.values.each {|v| 
         v.sort! {|a,b|
           a[:zip] <=> b[:zip]
         }}
        rescue

        end
  places = by_name.map {|k,v| v[0]}
   
  places.each {|record| clean_record! record}
  places.each {|record|
    record[:precision] = (record[:zip] == address.zip ? :zip : :city)
  }
  end
  places
end
canonicalize_places!(candidates) click to toggle source

Find and replace the city, state, and county information in a list of candidates with the primary place information for the ZIP codes in the candidate list.

# File lib/geocoder/us/database.rb, line 557
def canonicalize_places! (candidates)
  zips_used  = unique_values(candidates, :zip)
  pri_places = rows_to_h primary_places(zips_used), :zip
  candidates.map! {|record|
    current_places = pri_places[[record[:zip]]]
    # FIXME: this should never happen!
    return [] unless current_places
    top_priority = current_places.map{|p| p[:priority]}.min
    current_places.select {|p| p[:priority] == top_priority}.map {|p|
      record.merge({
        :city => p[:city], 
        :state => p[:state], 
        :fips_county => p[:fips_county]
      })
    }
  } 
  candidates.flatten!
end
clean_record!(record) click to toggle source

Clean up a candidate record by formatting the score, replacing nil values with empty strings, and deleting artifacts from database queries.

# File lib/geocoder/us/database.rb, line 579
def clean_record! (record)
  record[:score] = format("%.3f", record[:score]).to_f \
    unless record[:score].nil?
  record.keys.each {|k| record[k] = "" if record[k].nil? } # clean up nils
  record.delete :components unless @debug
  record.delete_if {|k,v| k.is_a? Fixnum or
      [:geometry, :side, :tlid, :fid, :fid1, :fid2, :street_phone,
       :city_phone, :fromhn, :tohn, :paflag, :flipped, :street_score,
       :city_score, :priority, :fips_class, :fips_place, :status].include? k}
end
distance(a, b) click to toggle source

Simple Euclidean distances between two 2-D coordinate pairs, scaled along the longitudinal axis by scale_lon.

# File lib/geocoder/us/database.rb, line 524
def distance (a, b)
  dx = (b[0] - a[0]) * scale_lon(a[1], b[1])
  dy = (b[1] - a[1]) 
  Math.sqrt(dx ** 2 + dy ** 2)
end
edges(edge_ids) click to toggle source

Query the edge table for a list of edges matching a list of edge IDs.

# File lib/geocoder/us/database.rb, line 236
def edges (edge_ids)
  in_list = placeholders_for edge_ids
  sql = "SELECT edge.* FROM edge WHERE edge.tlid IN (#{in_list})"
  execute sql, *edge_ids
end
execute(sql, *params) click to toggle source

Execute an SQL statement, bind a list of parameters, and return the result as a list of hashes.

# File lib/geocoder/us/database.rb, line 120
def execute (sql, *params)
  st = prepare(sql) 
  execute_statement st, *params
end
execute_statement(st, *params) click to toggle source

Execute an SQLite statement object, bind the parameters, map the column names to symbols, and return the rows as a list of hashes.

# File lib/geocoder/us/database.rb, line 129
def execute_statement (st, *params)
  if @debug
    start = Time.now
    $stderr.print "EXEC: #{params.inspect}\n" if !params.empty?
  end
  rows = []
  synchronize do
    result = st.execute(*params)
    columns = result.columns.map {|c| c.to_sym}
    result.each {|row|
       rows << Hash[*(columns.zip(row).flatten)]}
    
  end
  if @debug
    runtime = format("%.3f", Time.now - start)
    $stderr.print "ROWS: #{rows.length} (#{runtime}s)\n"
  end
  rows.reverse!
end
extend_ranges!(candidates) click to toggle source
# File lib/geocoder/us/database.rb, line 396
def extend_ranges! (candidates)
  edge_ids    = merge_edges! candidates
  full_ranges = range_ends edge_ids
  merge_rows! candidates, full_ranges, :tlid, :side
end
features_by_street(street, tokens) click to toggle source

Generate an SQL query and set of parameters against the feature and range tables for a street name and optional building number. The SQL is used by candidate_records and more_candidate_records to filter results by ZIP code.

# File lib/geocoder/us/database.rb, line 177
def features_by_street (street, tokens)
  metaphones = (["metaphone(?,5)"] * tokens.length).join(",")
  sql = "
    SELECT feature.*, levenshtein(?, street) AS street_score
      FROM feature
      WHERE street_phone IN (#{metaphones})"
  params = [street] + tokens
  return [sql, params]
end
features_by_street_and_zip(street, tokens, zips) click to toggle source

Query the feature and range tables for a set of ranges, given a building number, street name, and list of candidate ZIP codes. The metaphone and ZIP code indexes on the feature table are used to match results.

# File lib/geocoder/us/database.rb, line 191
def features_by_street_and_zip (street, tokens, zips)
  sql, params = features_by_street(street, tokens)
  in_list = placeholders_for zips
  sql    += " AND feature.zip IN (#{in_list})"
  params += zips
  execute sql, *params
end
find_candidates(address) click to toggle source
# File lib/geocoder/us/database.rb, line 332
def find_candidates (address)
  places = []
  candidates = []

  city = address.city.sort {|a,b|a.length <=> b.length}[0]
  if(!address.zip.empty? && !address.zip.nil?)
     places = places_by_zip city, address.zip 
  end
  places = places_by_city city, address.city_parts, address.state if places.empty?
  return [] if places.empty?

  address.city = unique_values places, :city
  return places if address.street.empty?

  zips = unique_values places, :zip
  street = address.street.sort {|a,b|a.length <=> b.length}[0]
 # puts "street parts = #{address.street_parts.inspect}"
  candidates = features_by_street_and_zip street, address.street_parts, zips

  if candidates.empty?
    candidates = more_features_by_street_and_zip street, address.street_parts, zips
  end

  merge_rows! candidates, places, :zip
  candidates
end
flush_statements() click to toggle source
# File lib/geocoder/us/database.rb, line 104
def flush_statements
  @st = {}
end
geocode(info_to_geocode, canonical_place=false) click to toggle source

Geocode a given address or place name string. The max_penalty and cutoff arguments are passed to the Address parse functions. If canonicalize is true, attempt to return the “primary” street and place names, if they are different from the ones given.

Returns possible candidate matches as a list of hashes.

  • The :lat and :lon values of each hash store the range-interpolated address coordinates as latitude and longitude in the WGS84 spheroid.

  • The :precision value may be one of :city, :zip, :street, or :range, in order of increasing precision.

  • The :score value will be a float between 0.0 and 1.0 representing the approximate “goodness” of the candidate match.

  • The other values in the hash will represent various structured components of the address and place name.

# File lib/geocoder/us/database.rb, line 719
def geocode (info_to_geocode, canonical_place=false)
  address = Address.new info_to_geocode
  $stderr.print "ADDR: #{address.inspect}\n" if @debug
  return [] if address.city.empty? and address.zip.empty?
  results = []
  start_time = Time.now if @debug
  if address.po_box? and !address.zip.empty?
    results = geocode_place address, canonical_place
  end
  if address.intersection? and !address.street.empty? and address.number.empty?
    results = geocode_intersection address, canonical_place
  end
  if results.empty? and !address.street.empty?
    results = geocode_address address, canonical_place
  end
  if results.empty?
    results = geocode_place address, canonical_place
  end
  if @debug
    runtime = format("%.3f", Time.now - start_time)
    $stderr.print "DONE: #{runtime}s\n"
  end
  results
end
geocode_address(address, canonical_place=false) click to toggle source

Given an Address object, return a list of possible geocodes by address range interpolation. If canonicalize is true, attempt to return the “primary” street and place names, if they are different from the ones given.

# File lib/geocoder/us/database.rb, line 661
def geocode_address (address, canonical_place=false)
  candidates = find_candidates address
  return [] if candidates.empty?
  return best_places(address, candidates, canonical_place) if candidates[0][:street].nil?

  score_candidates! address, candidates
  best_candidates! candidates 

  #candidates.sort {|a,b| b[:score] <=> a[:score]}.each {|candidate|
  add_ranges! address, candidates
  score_candidates! address, candidates
  #pp candidates.sort {|a,b| b[:score] <=> a[:score]}
  best_candidates! candidates 

  # sometimes multiple fids match the same tlid
  by_tlid = rows_to_h candidates, :tlid
  candidates = by_tlid.values.map {|records| records[0]}

  # if no number is assigned in the query, only return one
  # result for each street/zip combo
  if !address.number.empty?
    extend_ranges! candidates
  else
    by_street = rows_to_h candidates, :street, :zip
    candidates = by_street.values.map {|records| records[0]}
    merge_edges! candidates
  end
  candidates.map {|record|
    dist = interpolation_distance record
    $stderr.print "DIST: #{dist}\n" if @debug
    points = unpack_geometry record[:geometry]
    points.reverse! if record[:flipped]
    record[:lon], record[:lat] = interpolate points, dist
  }
  
  canonicalize_places! candidates if canonical_place

  candidates.each {|record| clean_record! record}
  candidates
end
geocode_intersection(address, canonical_place=false) click to toggle source
# File lib/geocoder/us/database.rb, line 627
def geocode_intersection (address, canonical_place=false)
  candidates = find_candidates address
  return [] if candidates.empty?
  return best_places(address, candidates, canonical_place) if candidates[0][:street].nil?

  features = rows_to_h candidates, :fid
  intersects = intersections_by_fid features.keys.flatten
  intersects.map! {|record|
    feat1, feat2 = record.values_at(:fid1, :fid2).map {|k| features[[k]][0]}
    record.merge! feat1
    record[:street1] = record.delete(:street)
    record[:street2] = feat2[:street]
    record[:lon], record[:lat] = unpack_geometry(record.delete(:point))[0]
    record[:precision] = :intersection
    record[:street_score] = (feat1[:street_score].to_f + feat2[:street_score].to_f)/2
    record
  }
  #pp(intersects)
  
  score_candidates! address, intersects
  best_candidates! intersects 

  by_point = rows_to_h(intersects, :lon, :lat)
  candidates = by_point.values.map {|records| records[0]}

  canonicalize_places! candidates if canonical_place
  candidates.each {|record| clean_record! record}
  candidates
end
geocode_place(address, canonicalize=false) click to toggle source

Given an Address object, return a list of possible geocodes by place name. If canonicalize is true, attempt to return the “primary” postal place name for the given city, state, or ZIP.

# File lib/geocoder/us/database.rb, line 620
def geocode_place (address, canonicalize=false)
  places = []
  places = places_by_zip address.text, address.zip if !address.zip.empty? or !address.zip.nil?
  places = places_by_city address.text, address.city_parts, address.state if places.empty?
  best_places address, places, canonicalize
end
interpolate(points, fraction) click to toggle source

Find an interpolated point along a list of linestring vertices proportional to the given fractional distance along the line.

# File lib/geocoder/us/database.rb, line 532
def interpolate (points, fraction)
  $stderr.print "POINTS: #{points.inspect}" if @debug
  return points[0] if fraction == 0.0 
  return points[-1] if fraction == 1.0 
  total = 0.0
  (1...points.length).each {|n| total += distance(points[n-1], points[n])}
  target = total * fraction
  for n in 1...points.length
    step = distance(points[n-1], points[n])
    if step < target
      target -= step
    else
      scale = scale_lon(points[n][1], points[n-1][1])
      dx = (points[n][0] - points[n-1][0]) * (target/step) * scale
      dy = (points[n][1] - points[n-1][1]) * (target/step)
      found = [points[n-1][0]+dx, points[n-1][1]+dy]
      return found.map {|x| format("%.6f", x).to_f}
    end
  end
 # raise "Can't happen!"
end
interpolation_distance(candidate) click to toggle source

Compute the fractional interpolation distance for a query number along an edge, given all of the ranges for the same side of that edge.

# File lib/geocoder/us/database.rb, line 475
def interpolation_distance (candidate)
  fromhn, tohn, number = candidate.values_at(:fromhn, :tohn, :number).map{|x| x.to_i}
  $stderr.print "NUM : #{fromhn} < #{number} < #{tohn} (flipped? #{candidate[:flipped]})\n" if @debug
  # don't need this anymore since range_ends was improved...
  fromhn, tohn = tohn, fromhn if fromhn > tohn
  if fromhn > number
    0.0
  elsif tohn < number
    1.0
  else
    (number - fromhn) / (tohn - fromhn).to_f
  end
end
intersections_by_fid(fids) click to toggle source
# File lib/geocoder/us/database.rb, line 265
def intersections_by_fid (fids)
  in_list = placeholders_for fids
  sql = "
    CREATE TEMPORARY TABLE intersection AS
      SELECT fid, substr(geometry,1,8) AS point
          FROM feature_edge, edge 
          WHERE feature_edge.tlid = edge.tlid
          AND fid IN (#{in_list})
      UNION
      SELECT fid, substr(geometry,length(geometry)-7,8) AS point
          FROM feature_edge, edge 
          WHERE feature_edge.tlid = edge.tlid
          AND fid IN (#{in_list});
    CREATE INDEX intersect_pt_idx ON intersection (point);"
  execute sql, *(fids + fids)
  # the a.fid < b.fid inequality guarantees consistent ordering of street
  # names in the output
  results = execute "
    SELECT a.fid AS fid1, b.fid AS fid2, a.point 
        FROM intersection a, intersection b, feature f1, feature f2
        WHERE a.point = b.point AND a.fid < b.fid
        AND f1.fid = a.fid AND f2.fid = b.fid
        AND f1.zip = f2.zip
        AND f1.paflag = 'P' AND f2.paflag = 'P';"
  execute "DROP TABLE intersection;"
  flush_statements # the CREATE/DROP TABLE invalidates prepared statements
  results
end
merge_edges!(candidates) click to toggle source
# File lib/geocoder/us/database.rb, line 388
def merge_edges! (candidates)
  edge_ids = unique_values candidates, :tlid
  records  = edges edge_ids
  merge_rows! candidates, records, :tlid
  candidates.reject! {|record| record[:tlid].nil?}
  edge_ids
end
merge_rows!(dest, src, *keys) click to toggle source

Merge the values in the list of rows given in src into the list of rows in dest, matching rows on the given list of keys. May generate more than one row in dest for each input dest row.

# File lib/geocoder/us/database.rb, line 319
def merge_rows! (dest, src, *keys)
  src = rows_to_h src, *keys
  dest.map! {|row|
    vals = row.values_at(*keys)
    if src.key? vals
      src[vals].map {|row2| row.merge row2}
    else
      [row]
    end
  }
  dest.flatten!
end
metaphone_placeholders_for(list) click to toggle source

Generate enough SQL placeholders for a list of objects.

# File lib/geocoder/us/database.rb, line 114
def metaphone_placeholders_for (list)
  (["metaphone(?,5)"] * list.length).join(",")
end
more_features_by_street_and_zip(street, tokens, zips) click to toggle source

Query the feature and range tables for a set of ranges, given a building number, street name, and list of candidate ZIP codes. The ZIP codes are reduced to a set of 3-digit prefixes, broadening the search area.

# File lib/geocoder/us/database.rb, line 203
def more_features_by_street_and_zip (street, tokens, zips)
  sql, params = features_by_street(street, tokens)
  if !zips.empty? and !zips[0].nil?
    zip3s = zips.map {|z| z[0..2]+'%'}.to_set.to_a
    like_list = zip3s.map {|z| "feature.zip LIKE ?"}.join(" OR ")
    sql += " AND (#{like_list})"
    params += zip3s
  end
  st = @db.prepare sql
  execute_statement st, *params
end
placeholders_for(list) click to toggle source

Generate enough SQL placeholders for a list of objects.

# File lib/geocoder/us/database.rb, line 109
def placeholders_for (list)
  (["?"] * list.length).join(",")
end
places_by_city(city, tokens, state) click to toggle source

Query the place table for by city, optional state, and zip. The metaphone index on the place table is used to match city names.

# File lib/geocoder/us/database.rb, line 157
def places_by_city (city, tokens, state)
  if city.nil?
    city = ""
  end
  if state.nil? or state.empty?
    and_state = ""
    args = [city] + tokens.clone
  else
    and_state = "AND state = ?"
    args = [city] + tokens.clone + [state]
  end
  metaphones = metaphone_placeholders_for tokens
  execute("SELECT *, levenshtein(?, city) AS city_score
            FROM place WHERE city_phone IN (#{metaphones}) #{and_state} order by priority desc LIMIT 1;", *args)
end
places_by_zip(city, zip) click to toggle source
# File lib/geocoder/us/database.rb, line 149
def places_by_zip (city, zip)
  execute("SELECT *, levenshtein(?, city) AS city_score
           FROM place WHERE zip = ? order by priority desc LIMIT 1;", city, zip)
end
prepare(sql) click to toggle source

Return a cached SQLite statement object, preparing it first if it’s not already in the cache.

# File lib/geocoder/us/database.rb, line 96
def prepare (sql)
  $stderr.print "SQL : #{sql}\n" if @debug
  synchronize do
    @st[sql] ||= @db.prepare sql
  end
  return @st[sql]
end
primary_places(zips) click to toggle source

Query the place table for notional “primary” place names for each of a list of ZIP codes. Since the place table shipped with this code is bespoke, and constructed from a variety of public domain sources, the primary name for a ZIP is not always the “right” one.

# File lib/geocoder/us/database.rb, line 298
def primary_places (zips)
  in_list = placeholders_for zips
  sql = "SELECT * FROM place WHERE zip IN (#{in_list}) order by priority desc;"
  execute sql, *zips
end
range_ends(edge_ids) click to toggle source

Query the range table for all ranges associated with the given list of edge IDs.

# File lib/geocoder/us/database.rb, line 244
def range_ends (edge_ids)
  in_list = placeholders_for edge_ids
  sql = "SELECT tlid, side,
                min(fromhn) > min(tohn) AS flipped,
                min(fromhn) AS from0, max(tohn)   AS to0,
                min(tohn)   AS from1, max(fromhn) AS to1
          FROM range WHERE tlid IN (#{in_list})
          GROUP BY tlid, side;"
  execute(sql, *edge_ids).map {|r|
    if r[:flipped] == "0"
      r[:flipped] = false
      r[:fromhn], r[:tohn] = r[:from0], r[:to0]
    else
      r[:flipped] = true
      r[:fromhn], r[:tohn] = r[:from1], r[:to1]
    end
    [:from0, :to0, :from1, :to1].each {|k| r.delete k}
    r
  }
end
ranges_by_feature(fids, number, prenum) click to toggle source
# File lib/geocoder/us/database.rb, line 215
def ranges_by_feature (fids, number, prenum)
  in_list = placeholders_for fids
  limit = 4 * fids.length
  sql = "
    SELECT feature_edge.fid AS fid, range.*
      FROM feature_edge, range
      WHERE fid IN (#{in_list})
      AND feature_edge.tlid = range.tlid"
  params = fids.clone
  unless prenum.nil?
    sql += " AND prenum = ?"
    params += [prenum]
  end
  sql += " 
      ORDER BY min(abs(fromhn - ?), abs(tohn - ?))
      LIMIT #{limit};"
  params += [number, number]
  execute sql, *params
end
rows_to_h(rows, *keys) click to toggle source

Convert a list of rows into a hash keyed by the given keys.

# File lib/geocoder/us/database.rb, line 310
def rows_to_h (rows, *keys)
  hash = {}
  rows.each {|row| (hash[row.values_at(*keys)] ||= []) << row; }
  hash
end
scale_lon(lat1,lat2) click to toggle source

Calculate the longitude scaling for the average of two latitudes.

# File lib/geocoder/us/database.rb, line 514
def scale_lon (lat1,lat2)
  # an approximation in place of lookup.rst (10e) and (10g)
  # = scale longitude distances by the cosine of the latitude
  # (or, actually, the mean of two latitudes)
  # -- is this even necessary?
  Math.cos((lat1+lat2) / 2 * Math::PI / 180)
end
score_candidates!(address, candidates) click to toggle source

Score a list of candidates. For each candidate:

  • For each item in the query:

** if the query item is blank but the candidate is not, score 0.15;

otherwise, if both are blank, score 1.0.

** If both items are set, compute the scaled Levenshtein-Damerau distance

between them, and add that value (between 0.0 and 1.0) to the score.
  • Add 0.5 to the score for each numbered end of the range that matches the parity of the query number.

  • Add 1.0 if the query number is in the candidate range, otherwise add a fractional value for the notional distance between the closest candidate corner and the query.

  • Finally, divide the score by the total number of comparisons. The result should be between 0.0 and 1.0, with 1.0 indicating a perfect match.

# File lib/geocoder/us/database.rb, line 416
def score_candidates! (address, candidates)
  for candidate in candidates
    candidate[:components] = {}
    compare = [:prenum, :state, :zip]
    denominator = compare.length + Street_Weight + City_Weight

    street_score = (1.0 - candidate[:street_score].to_f) * Street_Weight
    candidate[:components][:street] = street_score
    city_score   = (1.0 - candidate[:city_score].to_f) * City_Weight
    candidate[:components][:city] = city_score
    score = street_score + city_score

    compare.each {|key|
      src  = address.send(key); src = src ? src.downcase : ""
      dest = candidate[key]; dest = dest ? dest.downcase : ""
      item_score = (src == dest) ? 1 : 0
      candidate[:components][key] = item_score
      score += item_score
    }
   
    if address.number and !address.number.empty?
      parity = subscore = 0.0
      fromhn, tohn, assigned, hn = [
          candidate[:fromhn], 
          candidate[:tohn], 
          candidate[:number], 
          address.number].map {|s|s.to_i}
      if candidate[:precision] == :range
        subscore += Number_Weight
      elsif assigned > 0
        # only credit number subscore if assigned
        subscore += Number_Weight/(assigned - hn).abs.to_f
      end
      candidate[:components][:number] = subscore
      if hn > 0 and assigned > 0
        # only credit parity if a number was given *and* assigned
        parity += Parity_Weight/2.0 if fromhn % 2 == hn % 2
        parity += Parity_Weight/2.0 if tohn % 2 == hn % 2
      end
      candidate[:components][:parity] = parity
      score += subscore + parity
      denominator += Number_Weight + Parity_Weight
    end
    candidate[:components][:total] = score.to_f
    candidate[:components][:denominator] = denominator
    candidate[:score] = score.to_f / denominator
  end
end
synchronize() { || ... } click to toggle source
# File lib/geocoder/us/database.rb, line 44
def synchronize
  if not @threadsafe
    @@mutex.synchronize { yield }
  else
    yield
  end
end
tune(helper, cache_size) click to toggle source

Load the SQLite extension and tune the database settings. q.v. web.utk.edu/~jplyon/sqlite/SQLite_optimization_FAQ.html

# File lib/geocoder/us/database.rb, line 56
def tune (helper, cache_size)
  synchronize do
    @db.create_function("levenshtein", 2) do |func, word1, word2|
      test1, test2 = [word1, word2].map {|w|
        w.to_s.gsub(/\W/o, "").downcase
      }
      dist = Text::Levenshtein.distance(test1, test2)
      result = dist.to_f / [test1.length, test2.length].max
      func.set_result result 
    end
    @db.create_function("metaphone", 2) do |func, string, len|
      test = string.to_s.gsub(/\W/o, "")
      if test =~ /^(\d+)/o
        mph = $1
      elsif test =~ /^([wy])$/io
        mph = $1
      else
        mph = Text::Metaphone.metaphone test
      end
      func.result = mph[0...len.to_i]
    end
    @db.create_function("nondigit_prefix", 1) do |func, string|
      string.to_s =~ /^(.*\D)?(\d+)$/o
      func.result = ($1 || "")
    end
    @db.create_function("digit_suffix", 1) do |func, string|
      string.to_s =~ /^(.*\D)?(\d+)$/o
      func.result = ($2 || "")
    end
    #@db.enable_load_extension(1)
    #@db.load_extension(helper)
    #@db.enable_load_extension(0)
    @db.cache_size = cache_size
    @db.temp_store = "memory"
    @db.synchronous = "off"
  end
end
unique_values(rows, key) click to toggle source

Given a list of rows, find the unique values for a given key.

# File lib/geocoder/us/database.rb, line 305
def unique_values (rows, key)
  rows.map {|r| r[key]}.to_set.to_a
end
unpack_geometry(geom) click to toggle source

Unpack an array of little-endian 4-byte ints, and convert them into signed floats by dividing by 10^6, inverting the process used by the compress_wkb_line() function in the SQLite helper extension.

# File lib/geocoder/us/database.rb, line 492
def unpack_geometry (geom)
  points = []
  if !geom.nil?
    # Pete - The database format is completely different to the one
    # expected by the code, so I've done some detective work to
    # figure out what it should be. It looks like the format is
    # | 1 byte Type | 4 byte SRID | 4 byte element count| 8 byte double coordinates *
    # I've added new code to read this, and commented out the old.
    info = geom.unpack('CVVD*')
    coords = info.slice(3, info.length)
    points << [coords.shift, coords.shift] until coords.empty?

  #  coords = geom.unpack "V*" # little-endian 4-byte long ints
  #
  ## now map them into signed floats
  #  coords.map! {|i| ( i > (1 << 31) ? i - (1 << 32) : i ) / 1_000_000.0}
  #  points << [coords.shift, coords.shift] until coords.empty?
  end
  points
end