If you have a point layer with many attributes and objects
in .csv
, it is a good idea to use the multi-threaded
loading available in data.table::fread()
or
vroom::vroom()
. This is much faster than loading
geopackage
or .csv
through GDAL.
library("sf")
library("data.table") # library("vroom")
set.seed(1)
n = 500000
df = data.frame(x = rnorm(n), y = rnorm(n),
col_1 = sample(c(TRUE, FALSE), n, replace = TRUE), # logical
col_2 = sample(letters, n, replace = TRUE), # character
col_3 = runif(n), # double
col_4 = sample(1:100, n, replace = TRUE)) # integer
sf = st_as_sf(df, coords = c("x", "y"))
csv = tempfile(fileext = ".csv")
gpkg = tempfile(fileext = ".gpkg")
write.csv(df, csv, row.names = FALSE)
write_sf(sf, gpkg)
## sf + geopackage
system.time({
t1 = read_sf(gpkg)
})
## user system elapsed
## 1.65 0.06 1.73
## sf + csv
system.time({
t2 = read_sf(csv, options = c("AUTODETECT_TYPE=YES",
"X_POSSIBLE_NAMES=x",
"Y_POSSIBLE_NAMES=y",
"KEEP_GEOM_COLUMNS=NO"))
})
## user system elapsed
## 3.84 2.39 6.30
## data.table::fread + csv
system.time({
t3 = fread(csv, nThread = 4)
t3 = st_as_sf(t3, coords = c("x", "y"))
})
## user system elapsed
## 0.21 0.03 0.14