# Methods added to this helper will be available to all templates in the application. module ApplicationHelper CLUSTERING_NONE = 0 CLUSTERING_GRID = 1 CLUSTERING_CENTERED_GRID = 2 CLUSTERING_ADJUSTED_GRID = 3 CLUSTERING_PROXIMITY = 4 # Classes for doing clustering by nearest neighbor on a 2-D plane # No account is taken for the earth's curvature or ellipsoid shape, # as we assume data will be presented with a Mercator projection require 'bsearch' # code for choosing the closest pair of points from # http://en.wikipedia.org/wiki/Closest_pair_of_points_problem # no doubt much faster if this is all done in C # using the Ruby API # LocatablePoints are thin wrappers around the incoming # array of points, which are expected to have latitude and longitude # properties. # Use duck typing to give Cluster and LocatablePoint objects methods # with the same names -- x, y, is_cluster? class ClosestPoint def find_pair(arr, max_dist=1.0e20) # precondition: items in arr are sorted by x @dmin = max_dist @pts = [nil, nil] find_closest_points(arr, 0, arr.size) return [@dmin, @pts] end def f_equal(a, b) return (1 - a/b).abs < 0.005 # 1/10,000 end def add_one_del_two(points, new_node, a, b) # a.x <= new_node.x <= b.x # find a and b, figure where new_node should go, # insert it there, and then remove a and b from the array # faster way: # points[idx1:d] = points[idx+1:d+1] # points[d+1] = new_node # del points{b} # First find a, allow for other nodes with same x value ax = a.x i = points.bsearch_first{|pt| pt.x <=> ax} while points[i] != a i += 1 end # Now shift things over to the left until we find the space # where c should get placed cx = new_node.x while points[i + 1].x < cx # if true, points[i + 1] can't be b, because a.x <= cx <= b.x points[i] = points[i + 1] i += 1 end points[i] = new_node # Now just walk over to find b -- it shouldn't be too far from here # We don't even need to look up b.x, because we know it's not far, # unless most of the points are in a thin vertical slice between # a and b -- and we'll spend most of our time coalescing them anyway n = points.size loop do i += 1 if i >= n # This shouldn't happen $stderr.puts "Internal error -- node b (#{b} not found)" return end if points[i] == b points.delete_at(i) return elsif points[i].x > b.x $stderr.puts "Internal error -- looking for b at x=#{b.x}, saw pt #{i}:#{points[i].x}" return end end end private def dist(a, b) return (((a.x - b.x) * (a.x - b.x)) + ((a.y - b.y) * (a.y - b.y))) ** 0.5 end def compare(a, b) d = dist(a, b) if d < @dmin @dmin = d @pts = [a, b] end end def compare_y(a, b) if ((a.y - b.y).abs < @dmin) compare(a, b) end end def find_closest_points(arr, left, right) # right points one past the end of the sub-array in arr to compare n = right - left if n <= 1 return elsif n == 2 # Only need to compare the vertical dist -- sorting covers horiz compare_y(arr[left], arr[left + 1]) return elsif n == 3 a, b, c = arr[left .. left + 2] compare_y(a, b) compare_y(b, c) compare_y(a, c) return end mid = (left + right + 1) / 2 find_closest_points(arr, left, mid) return if @dist == 0 # Done find_closest_points(arr, mid, right) return if @dist == 0 find_crossing_nbrs(arr, left, mid, right) end def find_crossing_nbrs(arr, left, mid, right) start_left = left end_right = right - 1 mid_x = arr[mid].x # Avoid changing range to array, then reversing, then each'ing # was (left .. mid - 1).to_a.reverse.each{|i| ... } i = mid - 1 while i >= left if mid_x - arr[i].x > @dmin start_left = i + 1 break end i -= 1 end if start_left == mid # Nothing in the left side is close enough to the right side return end # We always have at least arr[mid] to check (mid + 1 .. right - 1).each do |i| if arr[i].x - mid_x > @dmin end_right = i - 1 break end end # Unroll the trivial 1 x 1 loop -- this is a worthwhile optimization if start_left + 1 == end_right a = arr[start_left] b = arr[end_right] if b.x - a.x <= @dmin compare_y(a, b) end return end # What if we check all pairs -- don't bother copying or sorting? # Either @dmin is relatively big and there won't be many points, # or it's small but we can narrow down the range. # For Ruby doing this reduces total time by about 10-15 % (start_left .. mid - 1).each do |i| (mid .. end_right).each do |j| a = arr[i] b = arr[j] if b.x - a.x <= @dmin compare_y(a, b) else break # no point continuing on right side end end end end # end function end class LocatablePoint attr_reader :point, :x, :y def initialize(point) @point = point @x = point[:longitude] @y = point[:latitude] end def is_cluster? false end end class Cluster attr_accessor :x, :y, :members def initialize(a, b) ca = a.is_cluster? cb = b.is_cluster? if ca || cb if ca @members = a.members @x = a.x @y = a.y # Don't destroy the original nodes if cb self.take_members(b) else self.add_member(b) end else @members = b.members @x = b.x @y = b.y self.add_member(a) end else # a and b must both be LocatablePoints @members = [a, b] @y = (a.y + b.y) / 2 @x = (a.x + b.x) / 2 end end def is_cluster? true end def add_member(p) size = @members.size @y = (size * @y + p.y) / (size + 1) @x = (size * @x + p.x) / (size + 1) @members << p end def take_members(c) size1 = @members.size size2 = c.members.size size3 = (size1 + size2) @y = (size1 * @y + size2 * c.y) / size3 @x = (size1 * @x + size2 * c.x) / size3 @members += c.members end end def cluster_by_distance(points, target, ne, sw) # points: an array of objects that have a latitude and longitude attributes # nw & sw: bound the view in (lat, long) pairs n = points.size width = (ne[0] - sw[0]).abs # abs to allow for wraparound height = ne[1] - sw[1]; dist_factor = 1.0 / 15 # 1/20 is too crowded max_accept_dist = (height * height + width * width) ** 0.5 * dist_factor t1 = Time.now points = points.map{|p| LocatablePoint.new(p) }.sort{|a,b| a.x <=> b.x } cp = ClosestPoint.new n = points.size num_points = n num_iters = 0 while num_points > target num_iters += 1 dist, pts = cp.find_pair(points) if dist > max_accept_dist puts "couldn't find a pair closer than max_accept_dist=#{max_accept_dist}" puts "ratio=#{max_accept_dist/height}, dist=#{dist}" break end a, b = pts c = Cluster.new(a, b) # Watch out for rounding errors if (c.x < a.x) c.x = a.x elsif c.x > b.x c.x = b.x end cp.add_one_del_two(points, c, a, b) num_points -= 1 end t2 = Time.now puts "#{num_iters} iters, #{n} points, actions:" dt = (t2 - t1) puts "==> #{points.size} items, elapsed time = #{dt} msecs." return points end # function end