library(tidyverse)
library(sf)
= st_read("data/Lisbon/BGRI2021_1106.gpkg") # census units in polygons
census plot(census$geom)
# from polygons to points
= census |>
census_poitns st_transform(4326) |> # make sue it is in universal CRS
st_centroid()
plot(census_poitns) # census units in points
names(census_poitns)
= census_poitns |>
census_poitns select(BGRI2021, N_INDIVIDUOS) |> # only my code area and population
rename(id = BGRI2021,
residents = N_INDIVIDUOS) # rename variables
st_write(census_poitns, "data/census_poitns.gpkg") # save for later
POIs and Grids
Census data
Search and download your census data from official sources.
In Portugal: https://mapas.ine.pt/download/index2021.phtml
For this exercises, you just need the population associated with a statistical unit, as smaller as possible
Code
library(mapview)
mapview(census_poitns) + mapview(census)
Dissolve - City limit
In the case you don’t have already a city limit, you can create one based on the “dissolve” of the inner boundaries of your census data, as follows
#dissolve
= census |> st_union() |> st_as_sf()
city_limit plot(city_limit)
If you don’t have census data at a reasonable level (census blocks), you can recreate random points in your city. See the code bellow 🔎
Generate synthetic census-like points for a City
# 0. Load libraries
library(sf)
library(dplyr)
# 1. Read city polygon (replace with your file path)
<- st_read("data/city_limit.gpkg") |> st_transform(4326)
city_limit
# 2. Set parameters
set.seed(42) # for reproducibility
<- 545000 # your city population
total_pop <- 300 # a census block average
avg_residents <- round(total_pop / avg_residents)
n_points
# 3. Generate random points within the city boundary
<- st_sample(city_limit, size = n_points, type = "random") |>
census_syn st_as_sf() |>
mutate(id = 1:n_points)
# # 4. Compute distance from city center (for density weighting) - OPTIONAL
# center <- st_centroid(city_limit)
# dist_center <- as.numeric(st_distance(census_syn, center)) # in meters
#
# # 5. Create weights so closer points get higher population - OPTIONAL
# # Inverse distance weighting (add +1 to avoid division by zero)
# weights <- 1 / (dist_center + 1)
# assign random populations that sum to total_pop
<- runif(n_points)
weights
# Normalize weights to sum to 1 and assign residents
<- round(weights / sum(weights) * total_pop)
residents
# 6. Adjust rounding so the total sums exactly to total_pop
<- total_pop - sum(residents)
diff if (diff != 0) {
1:abs(diff)] <- residents[1:abs(diff)] + sign(diff)
residents[
}
# 7. Add residents to points
$residents <- residents
census_synsum(census_syn$residents) # should be total_pop
# 8. Quick visualization
::mapview(census_syn, zcol = "residents")
mapview
# 9. Save for later
st_write(census_syn, "data/synthetic_census_points.gpkg", delete_dsn = TRUE)
Points of Interest
OpenStreetMap
From OSM we can export several different opportunities to use on routing and accessibility exercises.
For instance, the 15-min city idea follows the Flowers of Proximity, to categorize the opportinities in 7 different categories, based on Büttner et al. (2022).
For the Streets4All project, and SiteSelection package (Rosa Félix and Gabriel Valença 2024), we divided in:
- Amenities
- Healthcare
- Leisure
- Shop
- Sport
- Tourism
Hot export tool
Using the HOT Export Tool, you can select a few categories, and export as .gpkg
or .geojson
.
For instance: schools, hospitals, ATMs, parks, supermarkets, and restaurants.
Your further analysis will be made by comparing the categories you selected and exported.
Dealing with points and polygons from OSM
If your export brings both polygons and points, you should convert the polygons to points (centroids).
= st_read("data/pois.gpkg") # downloaded from HOT export
pois_raw
= pois_raw |>
pois_cat mutate(type = case_when(
!is.na(shop) & shop %in% c("supermarket", "convenience") ~ "grocery",
== "marketplace" ~ "grocery",
amenity !is.na(leisure) ~ leisure,
%in% c("restaurant", "pub", "cafe", "bar", "fast_food") ~ "restaurant",
amenity TRUE ~ amenity
|>
)) select(osm_id, type, name)
table(pois_cat$type)
atm grocery hospital park restaurant school
316 799 42 503 4956 445
# separate points form polygons
= pois_cat |> st_collection_extract("POINT")
pois_points = pois_cat |> st_collection_extract("POLYGON")
pois_polygons
# transform polygons in points
= st_centroid(pois_polygons)
pois_polygons
# bind them
= bind_rows(pois_points, pois_polygons)
pois
mapview(pois, zcol = "type")
Geometric filter
To keep only the POIs relative to your city limit, you can make a geometrical filter, as:
= pois[city_limit,]
pois
# export for later
st_write(pois, "data/Lisbon/pois_osm_points.gpkg", delete_dsn = TRUE)
Script
A more flexible approach to download the data, but requires more codding 🧑💻
See the script we did for all Portugal, and the dataset for Lisbon (2024): data_extract.R (line 146)
You can explore other OpenStreetMap categories at their Wiki.
Grids
You may want to use the original census units, or create a squared oh hexagonal grid.
- Create a grid
- Match all the Census and POIs to that grid
Create a grid
We will use the city of Lisbon as example
library(sf)
library(tidyverse)
library(mapview)
= st_read("data/Lisbon/Lisboa_lim.gpkg")
city_limit plot(city_limit)
Hexagonal using h3jsr
Make an hexagonal grid that covers the city polygon, using the universal H3 grid.
library(h3jsr)
# Resolution: https://h3geo.org/docs/core-library/restable/
# h3_res = 10 # 150m diameter
= 9 # 400m diameter
h3_res # h3_res = 8 # 1060m diameter
= city_limit |>
GRID_h3 polygon_to_cells(res = h3_res, simple = FALSE)
= GRID_h3$h3_addresses |>
GRID_h3 cell_to_polygon(simple = FALSE)
= GRID_h3 |>
GRID_h3 mutate(id = seq(1:nrow(GRID_h3))) # give an ID to each cell
= GRID_h3 |> st_drop_geometry() # save h3_address for later
h3_index
mapview(GRID_h3)
Squared
Make a squared grid that covers the city polygon
= city_limit |>
GRID st_transform(crs = 3857) |> # to a projected crs
st_make_grid(cellsize = 400, # meters, we are using a projected crs
what = "polygons",
square = TRUE) |> # if FALSE, hexagons
st_sf() |> # from list to sf
st_transform(crs = 4326) |> # back to WGS84
st_intersection(city_limit$geom) # crop (optional)
= GRID |>
GRID rename(geometry = st_make_grid.st_transform.city_limit..crs...3857...cellsize...400..) |>
mutate(id = c(1:nrow(GRID))) # just to give an ID to each cell
mapview(GRID, alpha.regions = 0.2)
You should stick with just one of them: squares or hexagons.
Areas with information
Now, we can associate these areas (and their centroids) with all the information we want.
With POIs
# Points of interest from script
= st_read("https://github.com/U-Shift/Traffic-Simulation-Models/releases/download/2025/pois_osm_points.gpkg") POIs
Code for squares
# count points by type in areas
= POIs |> st_join(GRID, join = st_within)
GRID_pois
= GRID_pois |>
counts_type st_drop_geometry() |> # drop geometry for counting
group_by(id, type) |>
summarise(n = n(), .groups = "drop") |>
::pivot_wider(names_from = type, values_from = n, values_fill = 0)
tidyr
= GRID |>
GRID_pois left_join(counts_type, by = "id") |>
mutate(across(where(is.numeric), ~replace_na(.x, 0)))
mapview(GRID_pois, zcol = "restaurant") # for instance
The result is a grid with too many columns (one for each type). Consider using only the group.
With Census
# Census data
= st_read("https://github.com/U-Shift/Traffic-Simulation-Models/releases/download/2025/Censos_Lx.gpkg")
census
# count points by type in areas
= census |> st_join(GRID, join = st_within)
GRID_census
= GRID_census |>
census_density st_drop_geometry() |> # drop geometry for counting
group_by(id.y) |>
summarise(buildings = sum(buildings),
families = sum(families),
residents = sum(residents)) |>
rename(id = id.y) |>
filter(!is.na(id))
= GRID |>
GRID_census left_join(census_density, by = "id") |>
mutate(across(where(is.numeric), ~replace_na(.x, 0)))
mapview(GRID_census, zcol = "residents")
All together
= GRID_census |> left_join(GRID_pois |> st_drop_geometry()) GRID_data
The same for Hex grid
Code for Hexagons
# POIs
= POIs |> st_join(GRID_h3, join = st_within)
GRIDhex_pois
= GRIDhex_pois |>
counts_type st_drop_geometry() |> # drop geometry for counting
group_by(id, type) |>
summarise(n = n(), .groups = "drop") |>
::pivot_wider(names_from = type, values_from = n, values_fill = 0)
tidyr
= GRID_h3 |>
GRIDhex_pois left_join(counts_type, by = "id") |>
mutate(across(where(is.numeric), ~replace_na(.x, 0)))
# Census
= census |> st_join(GRID_h3, join = st_within)
GRIDhex_census
= GRIDhex_census |>
census_density st_drop_geometry() |> # drop geometry for counting
group_by(id.y) |>
summarise(buildings = sum(buildings),
families = sum(families),
residents = sum(residents)) |>
rename(id = id.y) |>
filter(!is.na(id))
= GRID_h3 |>
GRIDhex_census left_join(census_density, by = "id") |>
mutate(across(where(is.numeric), ~replace_na(.x, 0)))
# All together now
= GRIDhex_census |> left_join(GRIDhex_pois |> st_drop_geometry())
GRIDhex_data
mapview(GRIDhex_data, zcol = "residents")
Centroid coordinates
We may want to have this information as points, for instance for routing to opportunities.
# hexagons
= GRIDhex_data |>
POINTShex st_centroid()
= GRIDhex_data |>
GRIDhex_data mutate(lon = st_coordinates(POINTShex)[,1],
lat = st_coordinates(POINTShex)[,2])
# squares
= GRID_data |>
POINTSsq st_centroid()
= GRID_data |>
GRID_data mutate(lon = st_coordinates(POINTSsq)[,1],
lat = st_coordinates(POINTSsq)[,2])
Don’t forget to save your data, this will be very useful for the next exercises.
saveRDS(GRID_data, "data/Lisbon/GRIDsq_data.rds")
saveRDS(GRIDhex_data, "data/Lisbon/GRIDhex_data.rds")