::p_load(olsrr, corrplot, sf, spdep, GWmodel, tmap, tidyverse, gtsummary,readxl,jsonlite,rvest) pacman
Takehome_Ex03
Take-home Exercise 3: Predicting HDB Public Housing Resale Pricies using Geographically Weighted Methods
Background
Housing is an essential component of household wealth worldwide. Buying a housing has always been a major investment for most people. The price of housing is affected by many factors. Some of them are global in nature such as the general economy of a country or inflation rate. Others can be more specific to the properties themselves. These factors can be further divided to structural and locational factors. Structural factors are variables related to the property themselves such as the size, fitting, and tenure of the property. Locational factors are variables related to the neighbourhood of the properties such as proximity to childcare centre, public transport service and shopping centre.
Conventional, housing resale prices predictive models were built by using Ordinary Least Square (OLS) method. However, this method failed to take into consideration that spatial autocorrelation and spatial heterogeneity exist in geographic data sets such as housing transactions. With the existence of spatial autocorrelation, the OLS estimation of predictive housing resale pricing models could lead to biased, inconsistent, or inefficient results (Anselin 1998). In view of this limitation, Geographical Weighted Models were introduced for calibrating predictive model for housing resale prices.
Objective
To predict HDB resale prices at the sub-market level for the month of January and February 2023 in Singapore.The predictive models will be built by using by using conventional OLS method and GWR methods. The objective is to compare the performance of the conventional OLS method versus the geographical weighted methods.
Concept for GWR, Hedonic pricing model
Geographically weighted regression (GWR) is a spatial statistical technique that takes non-stationary variables into consideration (e.g., climate; demographic factors; physical environment characteristics)
In this take home assignment, we need to build predictive model using GWR. The predictive model need to take consideration into locational factors such as proximity to childcare centre, public transport service and shopping centre.
Hence, we have to first identify the relevant location factors for this assignment, which including:
Proximity to CBD
Proximity to eldercare
Proximity to foodcourt/hawker centres
Proximity to MRT
Proximity to park
Proximity to good primary school
Proximity to shopping mall
Proximity to supermarket
Numbers of kindergartens within 350m
Numbers of childcare centres within 350m
Numbers of bus stop within 350m
Numbers of primary school within 1km
Steps
Load Necessary Library
Importing geospatial data (MP2019)
= st_read(dsn = "data/geospatial", layer = "MPSZ-2019") mpsz2019
Reading layer `MPSZ-2019' from data source
`C:\Quanfang777\IS415-GAA\Takehome_Exercise\Takehome_Exercise3\data\geospatial'
using driver `ESRI Shapefile'
Simple feature collection with 332 features and 6 fields
Geometry type: MULTIPOLYGON
Dimension: XY
Bounding box: xmin: 103.6057 ymin: 1.158699 xmax: 104.0885 ymax: 1.470775
Geodetic CRS: WGS 84
Updating CRS information
<- st_transform(mpsz2019, 3414) mpsz_svy21
check whether the CRS has correctly assigned:
st_crs(mpsz_svy21)
Coordinate Reference System:
User input: EPSG:3414
wkt:
PROJCRS["SVY21 / Singapore TM",
BASEGEOGCRS["SVY21",
DATUM["SVY21",
ELLIPSOID["WGS 84",6378137,298.257223563,
LENGTHUNIT["metre",1]]],
PRIMEM["Greenwich",0,
ANGLEUNIT["degree",0.0174532925199433]],
ID["EPSG",4757]],
CONVERSION["Singapore Transverse Mercator",
METHOD["Transverse Mercator",
ID["EPSG",9807]],
PARAMETER["Latitude of natural origin",1.36666666666667,
ANGLEUNIT["degree",0.0174532925199433],
ID["EPSG",8801]],
PARAMETER["Longitude of natural origin",103.833333333333,
ANGLEUNIT["degree",0.0174532925199433],
ID["EPSG",8802]],
PARAMETER["Scale factor at natural origin",1,
SCALEUNIT["unity",1],
ID["EPSG",8805]],
PARAMETER["False easting",28001.642,
LENGTHUNIT["metre",1],
ID["EPSG",8806]],
PARAMETER["False northing",38744.572,
LENGTHUNIT["metre",1],
ID["EPSG",8807]]],
CS[Cartesian,2],
AXIS["northing (N)",north,
ORDER[1],
LENGTHUNIT["metre",1]],
AXIS["easting (E)",east,
ORDER[2],
LENGTHUNIT["metre",1]],
USAGE[
SCOPE["Cadastre, engineering survey, topographic mapping."],
AREA["Singapore - onshore and offshore."],
BBOX[1.13,103.59,1.47,104.07]],
ID["EPSG",3414]]
Check invalid geometries
length(which(st_is_valid(mpsz_svy21) == FALSE))
[1] 6
Make the invalid geometries valid
<- st_make_valid(mpsz_svy21)
mpsz_svy21 length(which(st_is_valid(mpsz_svy21) == FALSE))
[1] 0
Aspatial Data Wrangling
Importing the aspatial data
= read_csv("data/aspatial/resale-flat-prices-based-on-registration-date-from-jan-2017-onwards.csv") resale
Rows: 148277 Columns: 11
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (8): month, town, flat_type, block, street_name, storey_range, flat_mode...
dbl (3): floor_area_sqm, lease_commence_date, resale_price
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
We plan to look at four-room flat and a transaction period between1st January 2021 to 31st December 2022.
<- resale %>%
resale filter(flat_type == "4 ROOM")
resale
# A tibble: 61,961 × 11
month town flat_…¹ block stree…² store…³ floor…⁴ flat_…⁵ lease…⁶ remai…⁷
<chr> <chr> <chr> <chr> <chr> <chr> <dbl> <chr> <dbl> <chr>
1 2017-01 ANG MO… 4 ROOM 472 ANG MO… 10 TO … 92 New Ge… 1979 61 yea…
2 2017-01 ANG MO… 4 ROOM 475 ANG MO… 07 TO … 91 New Ge… 1979 61 yea…
3 2017-01 ANG MO… 4 ROOM 629 ANG MO… 01 TO … 94 New Ge… 1981 63 yea…
4 2017-01 ANG MO… 4 ROOM 546 ANG MO… 01 TO … 92 New Ge… 1981 63 yea…
5 2017-01 ANG MO… 4 ROOM 131 ANG MO… 01 TO … 98 New Ge… 1979 61 yea…
6 2017-01 ANG MO… 4 ROOM 254 ANG MO… 04 TO … 97 New Ge… 1977 59 yea…
7 2017-01 ANG MO… 4 ROOM 470 ANG MO… 04 TO … 92 New Ge… 1979 61 yea…
8 2017-01 ANG MO… 4 ROOM 601 ANG MO… 04 TO … 91 New Ge… 1980 62 yea…
9 2017-01 ANG MO… 4 ROOM 463 ANG MO… 04 TO … 92 New Ge… 1980 62 yea…
10 2017-01 ANG MO… 4 ROOM 207 ANG MO… 04 TO … 97 New Ge… 1976 58 yea…
# … with 61,951 more rows, 1 more variable: resale_price <dbl>, and abbreviated
# variable names ¹flat_type, ²street_name, ³storey_range, ⁴floor_area_sqm,
# ⁵flat_model, ⁶lease_commence_date, ⁷remaining_lease
However, when we look at the code, there is no Longitude and Latitude Information, which means we need to geocode it, to make sure we get accurate geocode result, we have to combine the Block and Street Name as input:
In OneMapSG API, “ST.” is usually written as “SAINT” instead, so it is better for us to replace ST. with SAINT
$street_name <- gsub("ST\\.", "SAINT", resale$street_name) resale
Here is the code for geocoding the aspatial data: (Reference was taken from the senior sample submissions for the code for this section, with credit to MEGAN SIM TZE YEN)
#
# library(httr)
# geocode <- function(block, streetname) {
# base_url <- "https://developers.onemap.sg/commonapi/search"
# address <- paste(block, streetname, sep = " ")
# query <- list("searchVal" = address,
# "returnGeom" = "Y",
# "getAddrDetails" = "N",
# "pageNum" = "1")
#
# res <- GET(base_url, query = query)
# restext<-content(res, as="text")
#
# output <- fromJSON(restext) %>%
# as.data.frame %>%
# select(results.LATITUDE, results.LONGITUDE)
#
# return(output)
# }
# resale$LATITUDE <- 0
# resale$LONGITUDE <- 0
#
# for (i in 1:nrow(resale)){
# temp_output <- geocode(resale[i, 4], resale[i, 5])
#
# resale$LATITUDE[i] <- temp_output$results.LATITUDE
# resale$LONGITUDE[i] <- temp_output$results.LONGITUDE}
Make a copy for the geocoded data:
Make a copy for the geocoded data so we don’t need to run the previous process again:
#write_csv(resale, "data/rds/resalecopy1.csv")
Read the geocoded data
= read_csv("data/rds/resalecopy1.csv") resale
Rows: 23656 Columns: 13
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (8): month, town, flat_type, block, street_name, storey_range, flat_mode...
dbl (5): floor_area_sqm, lease_commence_date, resale_price, LATITUDE, LONGITUDE
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Update CRS
<- st_as_sf(resale,
resale.sf coords = c("LONGITUDE", "LATITUDE"),
crs=4326) %>%
st_transform(crs=3414)
Structural Factors
FLOOR LEVEL
unique(resale$storey_range)
[1] "04 TO 06" "01 TO 03" "07 TO 09" "10 TO 12" "13 TO 15" "16 TO 18"
[7] "19 TO 21" "22 TO 24" "28 TO 30" "25 TO 27" "34 TO 36" "31 TO 33"
[13] "43 TO 45" "37 TO 39" "40 TO 42" "46 TO 48" "49 TO 51"
We can observe that there are 17 categories of storey ranges. To represent each range, we will use the pivot_wider() function to create duplicate variables, assigning a value of 1 if the observation falls within the range and 0 if it does not.
<- resale %>%
resale pivot_wider(names_from = storey_range, values_from = storey_range,
values_fn = list(storey_range = ~1), values_fill = 0)
REMAINING LEASE
The current format of the remaining_lease variable is a string, but we need it to be in numeric format. To achieve this, we can split the string into its month and year values, calculate the remaining lease in years, and replace the original string values with the calculated numeric values.
<- str_split(resale$remaining_lease, " ")
str_list
for (i in 1:length(str_list)) {
if (length(unlist(str_list[i])) > 2) {
<- as.numeric(unlist(str_list[i])[1])
year <- as.numeric(unlist(str_list[i])[3])
month $remaining_lease[i] <- year + round(month/12, 2)
resale
}else {
<- as.numeric(unlist(str_list[i])[1])
year $remaining_lease[i] <- year
resale
} }
<- st_as_sf(resale,
resale_sf coords = c("LONGITUDE",
"LATITUDE"),
crs=4326) %>%
st_transform(crs = 3414)
Locational Factor
CBD
As the proximity to CBD is one of the locational factor we interested in to improve our predicted model, let’s take the coordinates of Downtown core to be the coordinates of CBD
<- 1.287953
lat <- 103.851784
lng
<- data.frame(lat, lng) %>%
cbd_sf st_as_sf(coords = c("lng", "lat"), crs=4326) %>%
st_transform(crs=3414)
Eldercare
<- st_read(dsn = "data/geospatial/eldercare-services-shp",
Eldercare_sf layer = "ELDERCARE")
Reading layer `ELDERCARE' from data source
`C:\Quanfang777\IS415-GAA\Takehome_Exercise\Takehome_Exercise3\data\geospatial\eldercare-services-shp'
using driver `ESRI Shapefile'
Simple feature collection with 133 features and 18 fields
Geometry type: POINT
Dimension: XY
Bounding box: xmin: 14481.92 ymin: 28218.43 xmax: 41665.14 ymax: 46804.9
Projected CRS: SVY21
st_crs(Eldercare_sf)
Coordinate Reference System:
User input: SVY21
wkt:
PROJCRS["SVY21",
BASEGEOGCRS["SVY21[WGS84]",
DATUM["World Geodetic System 1984",
ELLIPSOID["WGS 84",6378137,298.257223563,
LENGTHUNIT["metre",1]],
ID["EPSG",6326]],
PRIMEM["Greenwich",0,
ANGLEUNIT["Degree",0.0174532925199433]]],
CONVERSION["unnamed",
METHOD["Transverse Mercator",
ID["EPSG",9807]],
PARAMETER["Latitude of natural origin",1.36666666666667,
ANGLEUNIT["Degree",0.0174532925199433],
ID["EPSG",8801]],
PARAMETER["Longitude of natural origin",103.833333333333,
ANGLEUNIT["Degree",0.0174532925199433],
ID["EPSG",8802]],
PARAMETER["Scale factor at natural origin",1,
SCALEUNIT["unity",1],
ID["EPSG",8805]],
PARAMETER["False easting",28001.642,
LENGTHUNIT["metre",1],
ID["EPSG",8806]],
PARAMETER["False northing",38744.572,
LENGTHUNIT["metre",1],
ID["EPSG",8807]]],
CS[Cartesian,2],
AXIS["(E)",east,
ORDER[1],
LENGTHUNIT["metre",1,
ID["EPSG",9001]]],
AXIS["(N)",north,
ORDER[2],
LENGTHUNIT["metre",1,
ID["EPSG",9001]]]]
<- st_transform(Eldercare_sf, crs=3414) Eldercare_sf
Kindergartens
To get the Kindergartens’ geometry data, I refer to Megan’s method of using ONEMAP API to do the geocoding
The step including:
- Get the token from ONEMAP API
- Search the themes and check the themes available
- Using get_theme function to get the geospatial information of the Kindergarten
# library(sf)
# library(onemapsgapi)
#
# token <- "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJzdWIiOjEwMDUyLCJ1c2VyX2lkIjoxMDA1MiwiZW1haWwiOiJ0YWtvdGFrb3YwMDBAZ21haWwuY29tIiwiZm9yZXZlciI6ZmFsc2UsImlzcyI6Imh0dHA6XC9cL29tMi5kZmUub25lbWFwLnNnXC9hcGlcL3YyXC91c2VyXC9zZXNzaW9uIiwiaWF0IjoxNjc5NTU4NDYxLCJleHAiOjE2Nzk5OTA0NjEsIm5iZiI6MTY3OTU1ODQ2MSwianRpIjoiZDgyZTg1MDMzMTUzMTRkZjEzYjk5MWJmMDJkMDQ1NjIifQ.070RoJrraz95GuLVvYZpfzyMyGWQZ6S0D5FsLL39WGU"
# search_themes(token, "kindergartens")
# Kindergartens_tibble <- get_theme(token, "kindergartens")
# Kindergartens_sf <- st_as_sf(Kindergartens_tibble, coords=c("Lng", "Lat"), crs=4326)
# Kindergartens_sf <- st_transform(Kindergartens_sf, crs=3414)
Childcare Center
# search_themes(token, "childcare")
# library(sf)
# library(onemapsgapi)
#
# themetibble <- get_theme(token, "childcare")
# childcaresf <- st_as_sf(themetibble, coords=c("Lng", "Lat"), crs=4326)
# childcaresf <- st_transform(childcaresf, crs=3414)
Park
# search_themes(token, "parks")
# library(sf)
# library(onemapsgapi)
# Parks_themetibble <- get_theme(token, "nationalparks")
# Parks_sf <- st_as_sf(Parks_themetibble, coords=c("Lng", "Lat"), crs=4326)
# Parks_sf <- st_transform(Parks_sf, crs=3414)
Shopping mall
(Reference was taken from the senior sample submissions for the code for this section, with credit to NOR AISYAH BINTE AJIT) The approach is about extracting the names of the malls from Wikipedia and then use get_coords function to obtain the respective coordinates
The following code chunk performs three steps:
Reads the Wikipedia HTML page containing the Shopping Malls in Singapore.
Reads the text portion of the unordered list element selected by html_nodes().
Appends the extracted mall names to an empty mall_list.
# get_coords <- function(add_list){
#
# # Create a data frame to store all retrieved coordinates
# postal_coords <- data.frame()
#
# for (i in add_list){
# #print(i)
#
# r <- GET('https://developers.onemap.sg/commonapi/search?',
# query=list(searchVal=i,
# returnGeom='Y',
# getAddrDetails='Y'))
# data <- fromJSON(rawToChar(r$content))
# found <- data$found
# res <- data$results
#
# # Create a new data frame for each address
# new_row <- data.frame()
#
# # If single result, append
# if (found == 1){
# postal <- res$POSTAL
# lat <- res$LATITUDE
# lng <- res$LONGITUDE
# new_row <- data.frame(address= i, postal = postal, latitude = lat, longitude = lng)
# }
#
# # If multiple results, drop NIL and append top 1
# else if (found > 1){
# # Remove those with NIL as postal
# res_sub <- res[res$POSTAL != "NIL", ]
#
# # Set as NA first if no Postal
# if (nrow(res_sub) == 0) {
# new_row <- data.frame(address= i, postal = NA, latitude = NA, longitude = NA)
# }
#
# else{
# top1 <- head(res_sub, n = 1)
# postal <- top1$POSTAL
# lat <- top1$LATITUDE
# lng <- top1$LONGITUDE
# new_row <- data.frame(address= i, postal = postal, latitude = lat, longitude = lng)
# }
# }
#
# else {
# new_row <- data.frame(address= i, postal = NA, latitude = NA, longitude = NA)
# }
#
# # Add the row
# postal_coords <- rbind(postal_coords, new_row)
# }
# return(postal_coords)
# }
# url <- "https://en.wikipedia.org/wiki/List_of_shopping_malls_in_Singapore"
# malls_list <- list()
#
# for (i in 2:7){
# malls <- read_html(url) %>%
# html_nodes(xpath = paste('//*[@id="mw-content-text"]/div[1]/div[',as.character(i),']/ul/li',sep="") ) %>%
# html_text()
# malls_list <- append(malls_list, malls)
# }
# library(httr)
# malls_list_coords <- get_coords(malls_list) %>%
# rename("mall_name" = "address")
Some of the names of the shopping malls in our dataset are not up-to-date, which can cause issues in the analysis. For example, POMO has been renamed to GR.ID and OD Mall has been renamed to The Grandstand. Hence, we should address these issues before proceeding. One of the shopping malls, Yew Tee Shopping Centre, was not found when researched further, so we should remove this row from our dataset
# malls_list_coords <- subset(malls_list_coords, mall_name!= "Yew Tee Shopping Centre")
# invalid_malls<- subset(malls_list_coords, is.na(malls_list_coords$postal))
# invalid_malls_list <- unique(invalid_malls$mall_name)
# corrected_malls <- c("Clarke Quay", "City Gate", "Raffles Holland V", "Knightsbridge", "Mustafa Centre", "GR.ID", "Shaw House",
# "The Poiz Centre", "Velocity @ Novena Square", "Singapore Post Centre", "PLQ Mall", "KINEX", "The Grandstand")
#
# for (i in 1:length(invalid_malls_list)) {
# malls_list_coords <- malls_list_coords %>%
# mutate(mall_name = ifelse(as.character(mall_name) == invalid_malls_list[i], corrected_malls[i], as.character(mall_name)))
# }
Then create a list to store unique names
# malls_list <- sort(unique(malls_list_coords$mall_name))
To retrieve the coordinates of shopping malls
# malls_coords <- get_coords(malls_list)
# malls_coords[(is.na(malls_coords$postal) | is.na(malls_coords$latitude) | is.na(malls_coords$longitude)), ]
# malls_sf <- st_as_sf(malls_coords,
# coords = c("longitude",
# "latitude"),
# crs=4326) %>%
# st_transform(crs = 3414)
Primary School
# pri_sch <- read_csv("data/geospatial/general-information-of-schools.csv")
# pri_sch <- pri_sch %>%
# filter(mainlevel_code == "PRIMARY"| mainlevel_code == "MIXED LEVELS") %>%
# select(school_name, address, postal_code, mainlevel_code)
# prisch_list <- sort(unique(pri_sch$postal_code))
# prisch_coords <- get_coords(prisch_list)
# prisch_coords[(is.na(prisch_coords$postal) | is.na(prisch_coords$latitude) | is.na(prisch_coords$longitude)), ]
# prisch_coords = prisch_coords[c("postal","latitude", "longitude")]
# pri_sch <- left_join(pri_sch, prisch_coords, by = c('postal_code' = 'postal'))
# prisch_sf <- st_as_sf(pri_sch,
# coords = c("longitude",
# "latitude"),
# crs=4326) %>%
# st_transform(crs = 3414)
Select top 10 school
I refer to this website and check which are the exact top 10 primary schools and get their postal code, then I filtered them from the primary school dataset
# postal_codes <- c(599986, 449761, 597610, 536451, 579767, 128806, 569405, 738907, 579646, 227988)
#
# # Filter prisch_sf for rows with postal code in the vector
# top10prisch_sf <- prisch_sf %>%
# filter(postal_code %in% postal_codes)
# top10prisch_sf
Hawker Center
# hawker_sf <- st_read("data/geospatial/hawker-centres-kml.kml")
# hawker_sf <- st_transform(hawker_sf, crs=3414)
# st_crs(hawker_sf)
Bus Stop
# bus_sf <- st_read(dsn="data/geospatial/BusStop_Feb2023", layer="BusStop")
# bus_sf <- st_transform(bus_sf, crs=3414)
Mrt
# rail_network_sf <- st_read(dsn="data/geospatial/TrainStation_Feb2023", layer="RapidTransitSystemStation")
# rail_network_sf<- st_transform(rail_network_sf, crs=3414)
SuperMarket
# supermkt_sf <- st_read("data/geospatial/supermarkets-geojson.geojson")
# supermkt_sf<- st_transform(supermkt_sf, crs=3414)
Check invalid geometry for locational factor
Bus stop
# length(which(st_is_valid(bus_sf) == FALSE))
MRT
# length(which(st_is_valid(rail_network_sf) == FALSE))
It got invalid geometry and here is the code to make it valid:
# # Identify the invalid geometry
# library(leaflet)
# invalid_geom <- which(!st_is_valid(rail_network_sf))
#
# # Use st_cast() to convert the invalid geometry to a valid geometry type
# rail_network_sf <- st_cast(rail_network_sf, "MULTILINESTRING")
#
# # Use shiny to manually edit the geometry
# library(shiny)
# shinyApp(
# ui = fluidPage(
# leafletOutput("map")
# ),
# server = function(input, output) {
# output$map <- renderLeaflet({
# leaflet(rail_network_sf) %>%
# addTiles() %>%
# addPolylines()
# })
# observe({
# length(which(st_is_valid(rail_network_sf) == FALSE))
# length(which(st_is_valid(childcaresf) == FALSE))
ElderlyCare
# length(which(st_is_valid(Eldercare_sf) == FALSE))
# length(which(st_is_valid(cbd_sf)))
CBD
# cbd_sf <- st_make_valid(cbd_sf)
# length(which(st_is_valid(cbd_sf) == FALSE))
Childcare
# length(which(st_is_valid(childcaresf) == FALSE))
Kindergartens
# length(which(st_is_valid(Kindergartens_sf) == FALSE))
Malls
# length(which(st_is_valid(malls_sf) == FALSE))
Parks
# length(which(st_is_valid(Parks_sf) == FALSE))
Primary school
# length(which(st_is_valid(prisch_sf) == FALSE))
Top primary school
# length(which(st_is_valid(top10prisch_sf) == FALSE))
Calculate Proximity
# library(units)
# proximity <- function(df1, df2, varname) {
# dist_matrix <- st_distance(df1, df2) %>%
# drop_units()
# df1[,varname] <- rowMins(dist_matrix)
# return(df1)
# }
# install.packages("matrixStats")
# library(matrixStats)
# proximity <- function(df1, df2, varname) {
# dist_matrix <- st_distance(df1, df2) %>%
# drop_units()
# df1[,varname] <- rowMins(dist_matrix)
# return(df1)
# }
# resale.sf <-
# proximity(resale.sf, cbd_sf, "PROX_CBD") %>%
# proximity(., childcaresf, "PROX_CHILDCARE") %>%
# proximity(., Eldercare_sf, "PROX_ELDERCARE") %>%
# proximity(., hawker_sf, "PROX_HAWKER") %>%
# proximity(., rail_network_sf, "PROX_MRT") %>%
# proximity(., Parks_sf, "PROX_PARK") %>%
# proximity(., top10prisch_sf, "PROX_TOPPRISCH") %>%
# proximity(., malls_sf, "PROX_MALL") %>%
# proximity(., supermkt_sf, "PROX_SPRMKT")
# num_radius <- function(df1, df2, varname, radius) {
# dist_matrix <- st_distance(df1, df2) %>%
# drop_units() %>%
# as.data.frame()
# df1[,varname] <- rowSums(dist_matrix <= radius)
# return(df1)
# }
# resale.sf <-
# num_radius(resale.sf, Kindergartens_sf, "NUM_KNDRGTN", 350) %>%
# num_radius(., childcaresf, "NUM_CHILDCARE", 350) %>%
# num_radius(., bus_sf, "NUM_BUS_STOP", 350) %>%
# num_radius(., prisch_sf, "NUM_PriSch", 1000)
# resale.sf <- resale.sf %>%
# mutate() %>%
# rename("AREA_SQM" = "floor_area_sqm",
# "LEASE_YRS" = "remaining_lease",
# "PRICE"= "resale_price") %>%
# relocate(`PRICE`)
# st_write(resale.sf, "data/geospatial/resale-final_cleaned.shp")
<- st_read(dsn="data/geospatial", layer="resale-final_cleaned") resale.sf
Reading layer `resale-final_cleaned' from data source
`C:\Quanfang777\IS415-GAA\Takehome_Exercise\Takehome_Exercise3\data\geospatial'
using driver `ESRI Shapefile'
Simple feature collection with 23656 features and 24 fields
Geometry type: POINT
Dimension: XY
Bounding box: xmin: 11519.79 ymin: 28217.39 xmax: 42645.18 ymax: 48741.06
Projected CRS: SVY21 / Singapore TM
<- resale.sf %>%
resale.sf rename( "AREA_SQM"= "AREA_SQ",
"LEASE_YRS" = "LEASE_Y" ,
"PROX_CBD" = "PROX_CB",
"PROX_CHILDCARE" = "PROX_CH" ,
"PROX_ELDERCARE" = "PROX_EL",
"PROX_HAWKER"= "PROX_HA" ,
"PROX_PARK"= "PROX_PA",
"PROX_MRT" ="PROX_MR",
"PROX_SPRMKT"= "PROX_SP",
"PROX_MALL"= "PROX_MA",
"NUM_KNDRGTN"= "NUM_KND" ,
"NUM_CHILDCARE"="NUM_CHI" ,
"NUM_BUS_STOP"="NUM_BUS" ,
"PROX_TOPPRISCH"= "PROX_TO"
%>%
) relocate(`PRICE`)
Drawing Statistical Point Map
tmap_mode("view")
tmap mode set to interactive viewing
tmap_options(check.and.fix = TRUE)
tm_shape(resale.sf) +
tm_dots(col = "PRICE",
alpha = 0.6,
style="quantile") +
tm_view(set.zoom.limits = c(11,14))