Week3_Handon

Getting Started

pacman::p_load(sf, tmap, tidyverse)

Importing Geo-spatial Data into R

mpsz <- st_read(dsn = "Data/geospatial", 
                layer = "MP14_SUBZONE_WEB_PL")
Reading layer `MP14_SUBZONE_WEB_PL' from data source 
  `C:\Quanfang777\IS415-GAA\WeeklyExercise\week3\Data\geospatial' 
  using driver `ESRI Shapefile'
Simple feature collection with 323 features and 15 fields
Geometry type: MULTIPOLYGON
Dimension:     XY
Bounding box:  xmin: 2667.538 ymin: 15748.72 xmax: 56396.44 ymax: 50256.33
Projected CRS: SVY21

Examine the content

mpsz
Simple feature collection with 323 features and 15 fields
Geometry type: MULTIPOLYGON
Dimension:     XY
Bounding box:  xmin: 2667.538 ymin: 15748.72 xmax: 56396.44 ymax: 50256.33
Projected CRS: SVY21
First 10 features:
   OBJECTID SUBZONE_NO       SUBZONE_N SUBZONE_C CA_IND      PLN_AREA_N
1         1          1    MARINA SOUTH    MSSZ01      Y    MARINA SOUTH
2         2          1    PEARL'S HILL    OTSZ01      Y          OUTRAM
3         3          3       BOAT QUAY    SRSZ03      Y SINGAPORE RIVER
4         4          8  HENDERSON HILL    BMSZ08      N     BUKIT MERAH
5         5          3         REDHILL    BMSZ03      N     BUKIT MERAH
6         6          7  ALEXANDRA HILL    BMSZ07      N     BUKIT MERAH
7         7          9   BUKIT HO SWEE    BMSZ09      N     BUKIT MERAH
8         8          2     CLARKE QUAY    SRSZ02      Y SINGAPORE RIVER
9         9         13 PASIR PANJANG 1    QTSZ13      N      QUEENSTOWN
10       10          7       QUEENSWAY    QTSZ07      N      QUEENSTOWN
   PLN_AREA_C       REGION_N REGION_C          INC_CRC FMEL_UPD_D   X_ADDR
1          MS CENTRAL REGION       CR 5ED7EB253F99252E 2014-12-05 31595.84
2          OT CENTRAL REGION       CR 8C7149B9EB32EEFC 2014-12-05 28679.06
3          SR CENTRAL REGION       CR C35FEFF02B13E0E5 2014-12-05 29654.96
4          BM CENTRAL REGION       CR 3775D82C5DDBEFBD 2014-12-05 26782.83
5          BM CENTRAL REGION       CR 85D9ABEF0A40678F 2014-12-05 26201.96
6          BM CENTRAL REGION       CR 9D286521EF5E3B59 2014-12-05 25358.82
7          BM CENTRAL REGION       CR 7839A8577144EFE2 2014-12-05 27680.06
8          SR CENTRAL REGION       CR 48661DC0FBA09F7A 2014-12-05 29253.21
9          QT CENTRAL REGION       CR 1F721290C421BFAB 2014-12-05 22077.34
10         QT CENTRAL REGION       CR 3580D2AFFBEE914C 2014-12-05 24168.31
     Y_ADDR SHAPE_Leng SHAPE_Area                       geometry
1  29220.19   5267.381  1630379.3 MULTIPOLYGON (((31495.56 30...
2  29782.05   3506.107   559816.2 MULTIPOLYGON (((29092.28 30...
3  29974.66   1740.926   160807.5 MULTIPOLYGON (((29932.33 29...
4  29933.77   3313.625   595428.9 MULTIPOLYGON (((27131.28 30...
5  30005.70   2825.594   387429.4 MULTIPOLYGON (((26451.03 30...
6  29991.38   4428.913  1030378.8 MULTIPOLYGON (((25899.7 297...
7  30230.86   3275.312   551732.0 MULTIPOLYGON (((27746.95 30...
8  30222.86   2208.619   290184.7 MULTIPOLYGON (((29351.26 29...
9  29893.78   6571.323  1084792.3 MULTIPOLYGON (((20996.49 30...
10 30104.18   3454.239   631644.3 MULTIPOLYGON (((24472.11 29...

Importing Attribute Data into R

popdata <- read_csv("Data/aspatial/respopagesextod2011to2020.csv")
Rows: 984656 Columns: 7
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (5): PA, SZ, AG, Sex, TOD
dbl (2): Pop, Time

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

Data Preparation

popdata2020 <- popdata %>%
  filter(Time == 2020) %>%
  group_by(PA, SZ, AG)%>%
  summarise(`POP` = sum(`Pop`))%>% ungroup()%>%
  pivot_wider(names_from=AG, 
              values_from=POP) %>% mutate(YOUNG = rowSums(.[3:6])
         +rowSums(.[12])) %>%
mutate(`ECONOMY ACTIVE` = rowSums(.[7:11])+
rowSums(.[13:15]))%>%mutate(`AGED`=rowSums(.[16:21])) %>%
mutate(`TOTAL`=rowSums(.[3:21])) %>%  
mutate(`DEPENDENCY` = (`YOUNG` + `AGED`)
/`ECONOMY ACTIVE`) %>%  select(`PA`, `SZ`, `YOUNG`, 
       `ECONOMY ACTIVE`, `AGED`, 
       `TOTAL`, `DEPENDENCY`)
`summarise()` has grouped output by 'PA', 'SZ'. You can override using the
`.groups` argument.
popdata2020
# A tibble: 332 × 7
   PA         SZ                     YOUNG `ECONOMY ACTIVE`  AGED TOTAL DEPEND…¹
   <chr>      <chr>                  <dbl>            <dbl> <dbl> <dbl>    <dbl>
 1 Ang Mo Kio Ang Mo Kio Town Centre  1290             2760   760  4810    0.743
 2 Ang Mo Kio Cheng San               5640            16460  6050 28150    0.710
 3 Ang Mo Kio Chong Boon              5100            15000  6470 26570    0.771
 4 Ang Mo Kio Kebun Bahru             4620            13010  5120 22750    0.749
 5 Ang Mo Kio Sembawang Hills         1880             3630  1310  6820    0.879
 6 Ang Mo Kio Shangri-La              3330             9050  3610 15990    0.767
 7 Ang Mo Kio Tagore                  1940             4480  1530  7950    0.775
 8 Ang Mo Kio Townsville              4190            11950  5100 21240    0.777
 9 Ang Mo Kio Yio Chu Kang               0                0     0     0  NaN    
10 Ang Mo Kio Yio Chu Kang East       1110             2410   750  4270    0.772
# … with 322 more rows, and abbreviated variable name ¹​DEPENDENCY

Joining the attribute data and geospatial data

popdata2020 <- popdata2020 %>%
  mutate_at(.vars = vars(PA, SZ), 
          .funs = funs(toupper)) %>%
  filter(`ECONOMY ACTIVE` > 0)
Warning: `funs()` was deprecated in dplyr 0.8.0.
ℹ Please use a list of either functions or lambdas:

# Simple named list: list(mean = mean, median = median)

# Auto named with `tibble::lst()`: tibble::lst(mean, median)

# Using lambdas list(~ mean(., trim = .2), ~ median(., na.rm = TRUE))
mpsz_pop2020 <- left_join(mpsz, popdata2020,
                          by = c("SUBZONE_N" = "SZ"))
write_rds(mpsz_pop2020, "Data/rds/mpszpop2020.rds")

Choropleth Mapping

tmap_mode("plot")
tmap mode set to plotting
qtm(mpsz_pop2020, 
    fill = "DEPENDENCY")

tm_shape(mpsz_pop2020)+
  tm_fill("DEPENDENCY", 
          style = "quantile", 
          palette = "Blues",
          title = "Dependency ratio") +
  tm_layout(main.title = "Distribution of Dependency Ratio by planning subzone",
            main.title.position = "center",
            main.title.size = 1.2,
            legend.height = 0.45, 
            legend.width = 0.35,
            frame = TRUE) +
  tm_borders(alpha = 0.5) +
  tm_compass(type="8star", size = 2) +
  tm_scale_bar() +
  tm_grid(alpha =0.2) +
  tm_credits("Source: Planning Sub-zone boundary from Urban Redevelopment Authorithy (URA)\n and Population data from Department of Statistics DOS", 
             position = c("left", "bottom"))

Drawing a base map

tm_shape(mpsz_pop2020)+
  tm_fill("DEPENDENCY",
          n = 20,
          style = "jenks") +
  tm_borders(alpha = 0.5)

Get some descriptive statistics

summary(mpsz_pop2020$DEPENDENCY)
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
 0.0000  0.6519  0.7025  0.7742  0.7645 19.0000      92 
tm_shape(mpsz_pop2020)+
  tm_fill("DEPENDENCY",
          breaks = c(0, 0.60, 0.70, 0.80, 0.90, 1.00)) +
  tm_borders(alpha = 0.5)
Warning: Values have found that are higher than the highest break

tm_shape(mpsz_pop2020)+
  tm_fill("DEPENDENCY",
          style = "quantile",
          palette = "-Greens") +
  tm_borders(alpha = 0.5)

Map Layouts

tm_shape(mpsz_pop2020)+
  tm_fill("DEPENDENCY", 
          style = "jenks", 
          palette = "Blues", 
          legend.hist = TRUE, 
          legend.is.portrait = TRUE,
          legend.hist.z = 0.1) +
  tm_layout(main.title = "Distribution of Dependency Ratio by planning subzone \n(Jenks classification)",
            main.title.position = "center",
            main.title.size = 1,
            legend.height = 0.45, 
            legend.width = 0.35,
            legend.outside = FALSE,
            legend.position = c("right", "bottom"),
            frame = FALSE) +
  tm_borders(alpha = 0.5)

tm_shape(mpsz_pop2020)+
  tm_fill(c("YOUNG", "AGED"),
          style = "equal", 
          palette = "Blues") +
  tm_layout(legend.position = c("right", "bottom")) +
  tm_borders(alpha = 0.5) +
  tmap_style("white")
tmap style set to "white"
other available styles are: "gray", "natural", "cobalt", "col_blind", "albatross", "beaver", "bw", "classic", "watercolor"