You can use SQL to pre-filter features in {sf}
and {terra}
, so that selected objects/columns will be
loaded. You can also limit the extent of the layer. In addition, in
{terra}
you can limit the loading to attributes or geometry
only.
library("sf")
library("terra")
set.seed(1)
n = 500000
df = data.frame(x = rnorm(n), y = rnorm(n),
col_1 = sample(c(TRUE, FALSE), n, replace = TRUE), # logical
col_2 = sample(letters, n, replace = TRUE), # character
col_3 = runif(n), # double
col_4 = sample(1:100, n, replace = TRUE)) # integer
df = st_as_sf(df, coords = c("x", "y"), crs = 4326)
gpkg = tempfile(fileext = ".gpkg")
write_sf(df, gpkg, layer = "test")
{sf}
## default
system.time({
x = read_sf(gpkg)
})
## user system elapsed
## 1.67 0.06 1.73
## select columns
sql = "SELECT geom, col_2 FROM test"
system.time({
x = read_sf(gpkg, query = sql)
})
## user system elapsed
## 1.29 0.11 1.41
## select rows
sql = "SELECT * FROM test WHERE col_2 = 'a'"
system.time({
x = read_sf(gpkg, query = sql)
})
## user system elapsed
## 0.08 0.03 0.11
## select area
## slight slower but memory allocation is smaller
bbox = st_bbox(c(xmin = -1, xmax = 1, ymax = 1, ymin = -1),
crs = st_crs(4326))
bbox = st_as_text(st_as_sfc(bbox))
system.time({
x = read_sf(gpkg, wkt_filter = bbox)
})
## user system elapsed
## 0.91 0.60 1.52
{terra}
## default
system.time({
y = vect(gpkg)
})
## user system elapsed
## 1.00 0.09 1.12
## read only attributes
system.time({
y = vect(gpkg, what = "attributes")
})
## user system elapsed
## 0.33 0.05 0.37
## read only geometry
system.time({
y = vect(gpkg, what = "geoms")
})
## user system elapsed
## 0.76 0.00 0.76