Introduction to Rvoterdistance

Loren Collingwood

Overview

Rvoterdistance calculates the geographic distance between voters and polling locations (or vote-by-mail drop boxes) using the Haversine great-circle formula, implemented in C++ for speed. The package supports:

Installation

# From GitHub:
remotes::install_github("lorenc5/Rvoterdistance")

Included Data

The package ships with two example datasets:

library(Rvoterdistance)
data(meck_ev)

str(voter_meck)
#> 'data.frame':    4552 obs. of  3 variables:
#>  $ county: chr  "MECKLENBURG" "MECKLENBURG" "MECKLENBURG" "MECKLENBURG" ...
#>  $ long  : num  -80.9 -81 -80.8 -80.8 -80.9 ...
#>  $ lat   : num  35.2 35.1 35.2 35.3 35 ...
str(early_meck)
#> 'data.frame':    21 obs. of  4 variables:
#>  $ county     : chr  "MECKLENBURG" "MECKLENBURG" "MECKLENBURG" "MECKLENBURG" ...
#>  $ office_addr: chr  "BEATTIES FORD LIBRARY 2412 BEATTIES FORD RD" "BETTE RAE THOMAS RECREATION CENTER 2921 TUCKASEEGEE RD" "CORNELIUS TOWN HALL 21445 CATAWBA AVE" "DELTA CENTER 5408 BEATTIES FORD RD" ...
#>  $ long       : num  -80.9 -80.9 -80.9 -80.9 -80.8 ...
#>  $ lat        : num  35.3 35.2 35.5 35.3 35.2 ...

Basic Usage: Nearest Location

The main function is nearest_location(). With the default k = 1, it returns one row per voter with the distance to the nearest polling location:

result <- nearest_location(
  voters    = voter_meck,
  locations = early_meck,
  voter_coords    = c("lat", "long"),
  location_coords = c("lat", "long")
)

head(result)
#>        county      long      lat      county
#> 1 MECKLENBURG -80.92800 35.20503 MECKLENBURG
#> 2 MECKLENBURG -80.99874 35.11030 MECKLENBURG
#> 3 MECKLENBURG -80.81264 35.22413 MECKLENBURG
#> 4 MECKLENBURG -80.79422 35.26237 MECKLENBURG
#> 5 MECKLENBURG -80.87096 35.04406 MECKLENBURG
#> 6 MECKLENBURG -80.78188 35.46774 MECKLENBURG
#>                                 office_addr      long      lat distance_m
#> 1     WEST BOULEVARD LIBRARY 2157 WEST BLVD -80.89657 35.21157  2950.3663
#> 2    STEELE CREEK AREA 11130 SOUTH TRYON ST -80.96072 35.11588  3517.6541
#> 3  MIDWOOD CULTURAL CENTER 1817 CENTRAL AVE -80.80855 35.22020   574.0296
#> 4       SUGAR CREEK LIBRARY 4045 N TRYON ST -80.79749 35.25692   676.0503
#> 5 SOUTH COUNTY REGIONAL LIBRARY 5801 REA RD -80.81186 35.08731  7223.9220
#> 6     CORNELIUS TOWN HALL 21445 CATAWBA AVE -80.85924 35.48172  7184.1009
#>   distance_km distance_miles
#> 1   2.9503663      1.8332772
#> 2   3.5176541      2.1857743
#> 3   0.5740296      0.3566863
#> 4   0.6760503      0.4200792
#> 5   7.2239220      4.4887482
#> 6   7.1841009      4.4640044

The output includes the voter data, the matched location data, and three distance columns: distance_m (meters), distance_km, and distance_miles.

k-Nearest Locations

To find the 3 closest early voting sites for each voter:

result_k3 <- nearest_location(
  voter_meck, early_meck,
  voter_coords    = c("lat", "long"),
  location_coords = c("lat", "long"),
  k = 3,
  append_data = FALSE
)

head(result_k3, 9)
#>   voter_id rank location_id distance_m distance_km distance_miles
#> 1        1    1          21  2950.3663   2.9503663      1.8332772
#> 2        1    2           2  6166.9263   6.1669263      3.8319598
#> 3        1    3           9  7871.9153   7.8719153      4.8913935
#> 4        2    1          17  3517.6541   3.5176541      2.1857743
#> 5        2    2           9 13708.7253  13.7087253      8.5182281
#> 6        2    3          21 14613.4911  14.6134911      9.0804250
#> 7        3    1          11   574.0296   0.5740296      0.3566863
#> 8        3    2           5  2261.2717   2.2612717      1.4050926
#> 9        3    3           8  2600.9012   2.6009012      1.6161291

The output is in long format with a rank column (1 = nearest).

Distance Threshold

Find all early voting locations within 5 miles of each voter:

result_5mi <- nearest_location(
  voter_meck[1:20, ], early_meck,
  voter_coords    = c("lat", "long"),
  location_coords = c("lat", "long"),
  max_dist = 5,
  units = "miles",
  append_data = FALSE
)

head(result_5mi, 10)
#>    voter_id rank location_id distance_m distance_km distance_miles
#> 1         1    1          21  2950.3663   2.9503663      1.8332772
#> 2         1    2           2  6166.9263   6.1669263      3.8319598
#> 3         1    3           9  7871.9153   7.8719153      4.8913935
#> 4         2    1          17  3517.6541   3.5176541      2.1857743
#> 5         3    1          11   574.0296   0.5740296      0.3566863
#> 6         3    2           5  2261.2717   2.2612717      1.4050926
#> 7         3    3           8  2600.9012   2.6009012      1.6161291
#> 8         3    4          18  3901.7540   3.9017540      2.4244436
#> 9         3    5           1  6109.9996   6.1099996      3.7965871
#> 10        3    6           2  6177.6741   6.1776741      3.8386383

# How many locations within 5 miles per voter?
table(result_5mi$voter_id)
#> 
#>  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 
#>  3  1  9 10  1  1  5  2  1  1  2  9  1  3  2  7  2  6  1  2

Using sf Objects

If your data are already sf POINT objects, pass them directly — no need to specify coordinate column names:

library(sf)
#> Warning: package 'sf' was built under R version 4.4.3
#> Linking to GEOS 3.13.0, GDAL 3.8.5, PROJ 9.5.1; sf_use_s2() is TRUE

voters_sf <- st_as_sf(voter_meck, coords = c("long", "lat"), crs = 4326)
locs_sf   <- st_as_sf(early_meck, coords = c("long", "lat"), crs = 4326)

result_sf <- nearest_location(voters_sf, locs_sf, append_data = FALSE)
head(result_sf)
#>   voter_id distance_m distance_km distance_miles
#> 1        1  2950.3663   2.9503663      1.8332772
#> 2        2  3517.6541   3.5176541      2.1857743
#> 3        3   574.0296   0.5740296      0.3566863
#> 4        4   676.0503   0.6760503      0.4200792
#> 5        5  7223.9220   7.2239220      4.4887482
#> 6        6  7184.1009   7.1841009      4.4640044

If the CRS is not WGS-84 (EPSG:4326), the package automatically transforms to WGS-84 and prints a message.

Convenience Functions

For quick calculations without the full nearest_location() interface:

# Minimum distance in km for each voter
km <- dist_km(voter_meck$lat, voter_meck$long,
              early_meck$lat, early_meck$long)
summary(km)
#>      Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
#>  0.000352  1.873550  3.170477  3.496236  4.627227 10.400568

# Minimum distance in miles
mi <- dist_mile(voter_meck$lat, voter_meck$long,
                early_meck$lat, early_meck$long)
summary(mi)
#>     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
#> 0.000219 1.164173 1.970048 2.172466 2.875233 6.462630

# Single-pair distance (e.g., Charlotte to Raleigh)
haversine(35.2271, -80.8431, 35.7796, -78.6382, units = "miles")
#> [1] 129.9045

Performance

The Haversine computation runs in C++ and uses partial sorting (std::nth_element) for k-nearest queries, giving O(n) per voter instead of O(n log n). For large voter files, enable progress reporting:

result <- nearest_location(
  big_voter_file, locations,
  voter_coords = c("lat", "lon"),
  location_coords = c("lat", "lon"),
  k = 3,
  progress = TRUE
)