---
title: "Spatial Interpolation"
output: rmarkdown::html_vignette
vignette: >
  %\VignetteIndexEntry{Spatial Interpolation}
  %\VignetteEngine{knitr::rmarkdown}
  %\VignetteEncoding{UTF-8}
---

```{r, include = FALSE}
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>", 
  message = FALSE, 
  warning = FALSE
)
```

Spatial interpolation is the process of transforming data from one spatial resolution to another (e.g. from census tract to ZIP code). The `cincy::interpolate` function uses census block-level weights to interpolate data from one `cincy` geography to another `cincy` geography. Variables prefixed with `n_` will be interpolated extensively (weighted sum), and all other numeric variables will be interpolated intensively (weighted mean).

Here, we interpolate `fraction_poverty` from the 2018 American Community Survey from 2010 census tracts to 2010 Neighborhoods and 2010 ZCTAs.

```{r}
library(cincy)
library(dplyr)
library(sf)
library(tmap)
library(tidyr)

d_tract <- st_transform(cincy::dep_index, 3735) |>
  select(census_tract_id_2010, fraction_poverty)
```

```{r}
#| echo: false
#| fig-align: center

make_map <- function(d, legend.show = TRUE) {
  d |>
  tm_shape() +
  tm_polygons("fraction_poverty", 
              pal = "viridis", 
              style = "cont", 
              title = "Fraction Poverty", 
              breaks = seq(0, 1, 0.2), 
              legend.show = legend.show) +
  tm_layout(frame = FALSE, 
            legend.outside = TRUE)
}

make_map(d_tract)
```

## Interpolating to Neighborhood and ZCTA 

The `cincy::interpolate` function allows the user to choose from three sets of weights to be used for the weighted sums and/or averages: block group **population**, block group **number of homes**, and block group **area**.

**Neighborhood**

```{r}
d_neigh_pop <- 
  d_tract |>
  cincy::interpolate(to = cincy::neigh_cchmc_2010, weights = "pop") |>
  st_transform(3735)
```

```{r}
#| echo: false
make_map(d_neigh_pop)
```

**ZCTA**

```{r}
d_zcta_pop <- 
  d_tract |>
  cincy::interpolate(to = cincy::zcta_tigris_2010, weights = "pop") |>
  st_transform(3735)
```

```{r}
#| echo: false
make_map(d_zcta_pop)
```

The above examples use population (`pop`) weights, but `homes` or `area` weights can be used by changing the `weights` argument.

```{r}
#| echo: false
d_zcta_homes <- 
  d_tract |>
  cincy::interpolate(to = cincy::zcta_tigris_2010, weights = "homes") |>
  st_transform(3735)

d_zcta_area <- 
  d_tract |>
  cincy::interpolate(to = cincy::zcta_tigris_2010, weights = "area") |>
  st_transform(3735)

d_neigh_homes <- 
  d_tract |>
  cincy::interpolate(to = cincy::neigh_cchmc_2010, weights = "homes") |>
  st_transform(3735)

d_neigh_area <- 
  d_tract |>
  cincy::interpolate(to = cincy::neigh_cchmc_2010, weights = "area") |>
  st_transform(3735)
```

```{r}
#| echo: false

d_neigh <- 
  d_neigh_pop |>
  st_drop_geometry() |>
  rename(population = fraction_poverty) |>
  left_join(d_neigh_homes |> st_drop_geometry(), by = "neighborhood_2010") |>
  rename(homes = fraction_poverty) |>
  left_join(d_neigh_area |>  st_drop_geometry(), by = "neighborhood_2010") |>
  rename(area = fraction_poverty)

d_zcta <- 
  d_zcta_pop |>
  st_drop_geometry() |>
  rename(population = fraction_poverty) |>
  left_join(d_zcta_homes |> st_drop_geometry(), by = "zcta_2010") |>
  rename(homes = fraction_poverty) |>
  left_join(d_zcta_area |>  st_drop_geometry(), by = "zcta_2010") |>
  rename(area = fraction_poverty)
```

```{r}
#| echo: false
d_neigh_long <- d_neigh |>
  pivot_longer(cols = !neighborhood_2010, names_to = "weights", values_to = "fraction_poverty")

d_neigh_long <- left_join(cincy::neigh_cchmc_2010, d_neigh_long, by = "neighborhood_2010") |>
  st_transform(3735) |>
  mutate(geography = "neighborhood") |>
  rename(id = neighborhood_2010)

d_zcta_long <- d_zcta |>
  pivot_longer(cols = !zcta_2010, names_to = "weights", values_to = "fraction_poverty")

d_zcta_long <- left_join(cincy::zcta_tigris_2010, d_zcta_long, by = "zcta_2010") |>
  st_transform(3735) |>
  mutate(geography = "zcta") |>
  rename(id = zcta_2010)

d_long <- bind_rows(d_neigh_long, d_zcta_long)

d_long |>
  tm_shape() +
  tm_polygons("fraction_poverty", 
              pal = "viridis", 
              style = "cont", 
              title = "Fraction Poverty", 
              breaks = seq(0, 1, 0.2)) +
  tm_facets(by = c("geography", "weights"), 
            free.scales.fill = FALSE) + 
  tm_layout(frame = FALSE, 
            legend.show = FALSE)
```

Note that we lose variability when interpolating from a smaller geography to a larger geography. This is especially noticeable when interpolating from tract to ZCTA. 

Also note that all three weights yield similar results in this example. However, the result of using `area` weights varies slightly compared to `homes` and `population`. This conclusion is further supported by the correlation between interpolated `fraction_poverty` values using the three sets of weights.

**Neighborhood `fraction_poverty` interpolated from tract using `population`, `homes`, and `area` weights**

```{r}
#| echo: false
library(GGally)
ggpairs(d_neigh |> select(-neighborhood_2010))
```

**ZCTA `fraction_poverty` interpolated from tract using `population`, `homes`, and `area` weights**

```{r}
#| echo: false
ggpairs(d_zcta |> select(-zcta_2010))
```