--- title: "Spatial Interpolation" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{Spatial Interpolation} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r, include = FALSE} knitr::opts_chunk$set( collapse = TRUE, comment = "#>", message = FALSE, warning = FALSE ) ``` Spatial interpolation is the process of transforming data from one spatial resolution to another (e.g. from census tract to ZIP code). The `cincy::interpolate` function uses census block-level weights to interpolate data from one `cincy` geography to another `cincy` geography. Variables prefixed with `n_` will be interpolated extensively (weighted sum), and all other numeric variables will be interpolated intensively (weighted mean). Here, we interpolate `fraction_poverty` from the 2018 American Community Survey from 2010 census tracts to 2010 Neighborhoods and 2010 ZCTAs. ```{r} library(cincy) library(dplyr) library(sf) library(tmap) library(tidyr) d_tract <- st_transform(cincy::dep_index, 3735) |> select(census_tract_id_2010, fraction_poverty) ``` ```{r} #| echo: false #| fig-align: center make_map <- function(d, legend.show = TRUE) { d |> tm_shape() + tm_polygons("fraction_poverty", pal = "viridis", style = "cont", title = "Fraction Poverty", breaks = seq(0, 1, 0.2), legend.show = legend.show) + tm_layout(frame = FALSE, legend.outside = TRUE) } make_map(d_tract) ``` ## Interpolating to Neighborhood and ZCTA The `cincy::interpolate` function allows the user to choose from three sets of weights to be used for the weighted sums and/or averages: block group **population**, block group **number of homes**, and block group **area**. **Neighborhood** ```{r} d_neigh_pop <- d_tract |> cincy::interpolate(to = cincy::neigh_cchmc_2010, weights = "pop") |> st_transform(3735) ``` ```{r} #| echo: false make_map(d_neigh_pop) ``` **ZCTA** ```{r} d_zcta_pop <- d_tract |> cincy::interpolate(to = cincy::zcta_tigris_2010, weights = "pop") |> st_transform(3735) ``` ```{r} #| echo: false make_map(d_zcta_pop) ``` The above examples use population (`pop`) weights, but `homes` or `area` weights can be used by changing the `weights` argument. ```{r} #| echo: false d_zcta_homes <- d_tract |> cincy::interpolate(to = cincy::zcta_tigris_2010, weights = "homes") |> st_transform(3735) d_zcta_area <- d_tract |> cincy::interpolate(to = cincy::zcta_tigris_2010, weights = "area") |> st_transform(3735) d_neigh_homes <- d_tract |> cincy::interpolate(to = cincy::neigh_cchmc_2010, weights = "homes") |> st_transform(3735) d_neigh_area <- d_tract |> cincy::interpolate(to = cincy::neigh_cchmc_2010, weights = "area") |> st_transform(3735) ``` ```{r} #| echo: false d_neigh <- d_neigh_pop |> st_drop_geometry() |> rename(population = fraction_poverty) |> left_join(d_neigh_homes |> st_drop_geometry(), by = "neighborhood_2010") |> rename(homes = fraction_poverty) |> left_join(d_neigh_area |> st_drop_geometry(), by = "neighborhood_2010") |> rename(area = fraction_poverty) d_zcta <- d_zcta_pop |> st_drop_geometry() |> rename(population = fraction_poverty) |> left_join(d_zcta_homes |> st_drop_geometry(), by = "zcta_2010") |> rename(homes = fraction_poverty) |> left_join(d_zcta_area |> st_drop_geometry(), by = "zcta_2010") |> rename(area = fraction_poverty) ``` ```{r} #| echo: false d_neigh_long <- d_neigh |> pivot_longer(cols = !neighborhood_2010, names_to = "weights", values_to = "fraction_poverty") d_neigh_long <- left_join(cincy::neigh_cchmc_2010, d_neigh_long, by = "neighborhood_2010") |> st_transform(3735) |> mutate(geography = "neighborhood") |> rename(id = neighborhood_2010) d_zcta_long <- d_zcta |> pivot_longer(cols = !zcta_2010, names_to = "weights", values_to = "fraction_poverty") d_zcta_long <- left_join(cincy::zcta_tigris_2010, d_zcta_long, by = "zcta_2010") |> st_transform(3735) |> mutate(geography = "zcta") |> rename(id = zcta_2010) d_long <- bind_rows(d_neigh_long, d_zcta_long) d_long |> tm_shape() + tm_polygons("fraction_poverty", pal = "viridis", style = "cont", title = "Fraction Poverty", breaks = seq(0, 1, 0.2)) + tm_facets(by = c("geography", "weights"), free.scales.fill = FALSE) + tm_layout(frame = FALSE, legend.show = FALSE) ``` Note that we lose variability when interpolating from a smaller geography to a larger geography. This is especially noticeable when interpolating from tract to ZCTA. Also note that all three weights yield similar results in this example. However, the result of using `area` weights varies slightly compared to `homes` and `population`. This conclusion is further supported by the correlation between interpolated `fraction_poverty` values using the three sets of weights. **Neighborhood `fraction_poverty` interpolated from tract using `population`, `homes`, and `area` weights** ```{r} #| echo: false library(GGally) ggpairs(d_neigh |> select(-neighborhood_2010)) ``` **ZCTA `fraction_poverty` interpolated from tract using `population`, `homes`, and `area` weights** ```{r} #| echo: false ggpairs(d_zcta |> select(-zcta_2010)) ```