From 8d1432ba536937b8f995f2365d89a3d40efdaa4c Mon Sep 17 00:00:00 2001 From: Eric Scott Date: Mon, 28 Oct 2024 15:24:30 -0700 Subject: [PATCH 01/13] split tests for rast and vect --- tests/testthat/_snaps/tar-terra-rast.md | 15 ++++ ...test-tar-terra.R => test-tar-terra-rast.R} | 71 ------------------ tests/testthat/test-tar-terra-vect.R | 73 +++++++++++++++++++ 3 files changed, 88 insertions(+), 71 deletions(-) create mode 100644 tests/testthat/_snaps/tar-terra-rast.md rename tests/testthat/{test-tar-terra.R => test-tar-terra-rast.R} (62%) create mode 100644 tests/testthat/test-tar-terra-vect.R diff --git a/tests/testthat/_snaps/tar-terra-rast.md b/tests/testthat/_snaps/tar-terra-rast.md new file mode 100644 index 0000000..0b2a483 --- /dev/null +++ b/tests/testthat/_snaps/tar-terra-rast.md @@ -0,0 +1,15 @@ +# tar_terra_rast() works + + Code + x + Output + class : SpatRaster + dimensions : 90, 95, 1 (nrow, ncol, nlyr) + resolution : 0.008333333, 0.008333333 (x, y) + extent : 5.741667, 6.533333, 49.44167, 50.19167 (xmin, xmax, ymin, ymax) + coord. ref. : lon/lat WGS 84 (EPSG:4326) + source : test_terra_rast + name : elevation + min value : 141 + max value : 547 + diff --git a/tests/testthat/test-tar-terra.R b/tests/testthat/test-tar-terra-rast.R similarity index 62% rename from tests/testthat/test-tar-terra.R rename to tests/testthat/test-tar-terra-rast.R index 136a16b..0cb8e64 100644 --- a/tests/testthat/test-tar-terra.R +++ b/tests/testthat/test-tar-terra-rast.R @@ -57,78 +57,7 @@ targets::tar_test("tar_terra_rast() works with dynamic branching", { targets::tar_make() expect_length(targets::tar_read(my_map_plus), 2) }) -targets::tar_test("tar_terra_vect() works", { - targets::tar_script({ - lux_area <- function(projection = "EPSG:4326") { - terra::project( - terra::vect(system.file("ex", "lux.shp", - package = "terra" - )), - projection - ) - } - list( - geotargets::tar_terra_vect( - test_terra_vect, - lux_area() - ), - geotargets::tar_terra_vect( - test_terra_vect_shz, - lux_area(), - filetype = "ESRI Shapefile" - ) - ) - }) - targets::tar_make() - x <- targets::tar_read(test_terra_vect) - y <- targets::tar_read(test_terra_vect_shz) - expect_s4_class(x, "SpatVector") - expect_s4_class(y, "SpatVector") - expect_snapshot(x) - expect_snapshot(y) - expect_equal(terra::values(x), terra::values(y)) -}) - -targets::tar_test("tar_terra_vect() works with multiple workers (tests marshaling/unmarshaling)", { - targets::tar_script({ - targets::tar_option_set(controller = crew::crew_controller_local(workers = 2)) - list( - geotargets::tar_terra_vect( - vect1, - terra::vect(system.file("ex", "lux.shp", package = "terra")) - ), - geotargets::tar_terra_vect( - vect2, - terra::vect(system.file("ex", "lux.shp", package = "terra")) - ) - ) - }) - targets::tar_make() - expect_true(all(is.na(targets::tar_meta()$error))) - expect_s4_class(targets::tar_read(vect1), "SpatVector") -}) -targets::tar_test("tar_terra_vect() works with dynamic branching", { - targets::tar_script({ - list( - geotargets::tar_terra_vect( - my_vect, - terra::vect(system.file("ex", "lux.shp",package = "terra")) - ), - targets::tar_target( - to_sub, - c("Clervaux", "Redange") - ), - geotargets::tar_terra_vect( - my_vect_subs, - my_vect[my_vect$NAME_2 == to_sub], - pattern = to_sub - ) - ) - }) - targets::tar_make() - expect_length(targets::tar_read(my_vect_subs), 2) -}) targets::tar_test("user resources are passed correctly", { library(crew) diff --git a/tests/testthat/test-tar-terra-vect.R b/tests/testthat/test-tar-terra-vect.R new file mode 100644 index 0000000..52b05cf --- /dev/null +++ b/tests/testthat/test-tar-terra-vect.R @@ -0,0 +1,73 @@ +targets::tar_test("tar_terra_vect() works", { + targets::tar_script({ + lux_area <- function(projection = "EPSG:4326") { + terra::project( + terra::vect(system.file("ex", "lux.shp", + package = "terra" + )), + projection + ) + } + list( + geotargets::tar_terra_vect( + test_terra_vect, + lux_area() + ), + geotargets::tar_terra_vect( + test_terra_vect_shz, + lux_area(), + filetype = "ESRI Shapefile" + ) + ) + }) + targets::tar_make() + x <- targets::tar_read(test_terra_vect) + y <- targets::tar_read(test_terra_vect_shz) + expect_s4_class(x, "SpatVector") + expect_s4_class(y, "SpatVector") + expect_snapshot(x) + expect_snapshot(y) + expect_equal(terra::values(x), terra::values(y)) +}) + + +targets::tar_test("tar_terra_vect() works with dynamic branching", { + targets::tar_script({ + list( + geotargets::tar_terra_vect( + my_vect, + terra::vect(system.file("ex", "lux.shp",package = "terra")) + ), + targets::tar_target( + to_sub, + c("Clervaux", "Redange") + ), + geotargets::tar_terra_vect( + my_vect_subs, + my_vect[my_vect$NAME_2 == to_sub], + pattern = to_sub + ) + ) + }) + targets::tar_make() + expect_length(targets::tar_read(my_vect_subs), 2) +}) + +targets::tar_test("tar_terra_vect() works with multiple workers (tests marshaling/unmarshaling)", { + targets::tar_script({ + targets::tar_option_set(controller = crew::crew_controller_local(workers = 2)) + list( + geotargets::tar_terra_vect( + vect1, + terra::vect(system.file("ex", "lux.shp", package = "terra")) + ), + geotargets::tar_terra_vect( + vect2, + terra::vect(system.file("ex", "lux.shp", package = "terra")) + ) + ) + }) + targets::tar_make() + expect_true(all(is.na(targets::tar_meta()$error))) + expect_s4_class(targets::tar_read(vect1), "SpatVector") +}) From abf994a819a6e80f9c344b5c5426c66f133492af Mon Sep 17 00:00:00 2001 From: Eric Scott Date: Mon, 28 Oct 2024 15:30:45 -0700 Subject: [PATCH 02/13] add test for metadata --- tests/testthat/test-tar-terra-rast.R | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/tests/testthat/test-tar-terra-rast.R b/tests/testthat/test-tar-terra-rast.R index 0cb8e64..b9d7109 100644 --- a/tests/testthat/test-tar-terra-rast.R +++ b/tests/testthat/test-tar-terra-rast.R @@ -126,3 +126,26 @@ tar_test("That changing filetype invalidates a target", { }) expect_equal(tar_outdated(), "r") }) + +tar_test("metadata is maintained", { + tar_script({ + library(targets) + library(geotargets) + library(terra) + make_rast <- function() { + f <- system.file("ex/elev.tif", package="terra") + r <- terra::rast(f) + r <- c(r, r + 10, r/2) + terra::units(r) <- rep("m", 3) + terra::time(r) <- as.Date("2024-10-01") + c(0,1,2) + r + } + list( + tar_terra_rast(r, make_rast(), preserve_metadata = TRUE) + ) + }) + tar_make() + x <- tar_read(r) + expect_equal(terra::units(x), "m") + expect_equal(terra::time(x), as.Date("2024-10-01") + c(0,1,2)) +}) From a2749acfc3ee04c36532b782fd2a801765bead5b Mon Sep 17 00:00:00 2001 From: Eric Scott Date: Mon, 28 Oct 2024 15:56:15 -0700 Subject: [PATCH 03/13] fix test --- tests/testthat/test-tar-terra-rast.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/testthat/test-tar-terra-rast.R b/tests/testthat/test-tar-terra-rast.R index b9d7109..85590a6 100644 --- a/tests/testthat/test-tar-terra-rast.R +++ b/tests/testthat/test-tar-terra-rast.R @@ -146,6 +146,6 @@ tar_test("metadata is maintained", { }) tar_make() x <- tar_read(r) - expect_equal(terra::units(x), "m") + expect_equal(terra::units(x), rep("m", 3)) expect_equal(terra::time(x), as.Date("2024-10-01") + c(0,1,2)) }) From 10a51719eceef433817b3de4a1814e1fb4a60e45 Mon Sep 17 00:00:00 2001 From: Eric Scott Date: Mon, 28 Oct 2024 16:00:56 -0700 Subject: [PATCH 04/13] implement preserve_metadata argument --- DESCRIPTION | 3 +- R/tar-terra-rast.R | 71 ++++++++++++++++++++++++++++++++++++------- man/tar_terra_rast.Rd | 8 +++++ 3 files changed, 70 insertions(+), 12 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 63a6b20..bc9c62d 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -42,7 +42,8 @@ Suggests: stars, terra (>= 1.7.71), testthat (>= 3.0.0), - withr (>= 3.0.0) + withr (>= 3.0.0), + zip Config/testthat/edition: 3 URL: https://github.com/njtierney/geotargets, https://njtierney.github.io/geotargets/ BugReports: https://github.com/njtierney/geotargets/issues diff --git a/R/tar-terra-rast.R b/R/tar-terra-rast.R index 337c1b8..265c87e 100644 --- a/R/tar-terra-rast.R +++ b/R/tar-terra-rast.R @@ -6,6 +6,12 @@ #' to [terra::writeRaster()] #' @param gdal character. GDAL driver specific datasource creation options #' passed to [terra::writeRaster()] +#' @param preserve_metadata logical. When `FALSE` (default), any auxiliary files +#' that would be written by [terra::writeRaster()] containing raster metadata +#' such as units and datetimes are lost (note that this does not include layer +#' names set with `names() <-`). When `TRUE`, these metadata are retained by +#' archiving all written files as a zip file upon writing and unzipping them +#' upon reading. This adds extra overhead and will slow pipelines. #' @param ... Additional arguments not yet used #' #' @inheritParams targets::tar_target @@ -38,6 +44,7 @@ tar_terra_rast <- function(name, pattern = NULL, filetype = geotargets_option_get("gdal.raster.driver"), gdal = geotargets_option_get("gdal.raster.creation.options"), + preserve_metadata = FALSE, #TODO: add a geotargets option for this ..., tidy_eval = targets::tar_option_get("tidy_eval"), packages = targets::tar_option_get("packages"), @@ -90,19 +97,11 @@ tar_terra_rast <- function(name, packages = packages, library = library, format = targets::tar_format( - read = function(path) terra::rast(path), - write = function(object, path) { - terra::writeRaster( - object, - path, - filetype = filetype, - overwrite = TRUE, - gdal = gdal - ) - }, + read = tar_rast_read(preserve_metadata = preserve_metadata), + write = tar_rast_write(filetype = filetype, gdal = gdal, preserve_metadata = preserve_metadata), marshal = function(object) terra::wrap(object), unmarshal = function(object) terra::unwrap(object), - substitute = list(filetype = filetype, gdal = gdal) + substitute = list(filetype = filetype, gdal = gdal, preserve_metadata = preserve_metadata) ), repository = repository, iteration = "list", #only "list" works right now @@ -118,3 +117,53 @@ tar_terra_rast <- function(name, description = description ) } + +tar_rast_read <- function(preserve_metadata) { + if (isTRUE(preserve_metadata)) { + function(path) { + tmp <- withr::local_tempdir() + zip::unzip(zipfile = path, exdir = tmp) + terra::rast(file.path(tmp, basename(path))) + } + } else { + function(path) terra::rast(path) + } +} + +tar_rast_write <- function(filetype, gdal, preserve_metadata) { + if (isTRUE(preserve_metadata)) { + function(object, path) { + #write the raster in a fresh local tempdir() that disappears when function is done + tmp <- withr::local_tempdir() + dir.create(file.path(tmp, dirname(path)), recursive = TRUE) + terra::writeRaster( + object, + file.path(tmp, path), + filetype = filetype, + overwrite = TRUE, + gdal = gdal + ) + #package files into a zip file using `zip::zip()` + raster_files <- list.files(file.path(tmp, dirname(path)), full.names = TRUE) + zip::zip( + file.path(tmp, basename(path)), + files = raster_files, + compression_level = 1, + mode = "cherry-pick", + root = dirname(raster_files)[1] + ) + #move the zip file to the expected place + file.rename(file.path(tmp, basename(path)), path) + } + } else { + function(object, path) { + terra::writeRaster( + object, + path, + filetype = filetype, + overwrite = TRUE, + gdal = gdal + ) + } + } +} diff --git a/man/tar_terra_rast.Rd b/man/tar_terra_rast.Rd index 198fb1b..1cdf271 100644 --- a/man/tar_terra_rast.Rd +++ b/man/tar_terra_rast.Rd @@ -10,6 +10,7 @@ tar_terra_rast( pattern = NULL, filetype = geotargets_option_get("gdal.raster.driver"), gdal = geotargets_option_get("gdal.raster.creation.options"), + preserve_metadata = FALSE, ..., tidy_eval = targets::tar_option_get("tidy_eval"), packages = targets::tar_option_get("packages"), @@ -73,6 +74,13 @@ to \code{\link[terra:writeRaster]{terra::writeRaster()}}} \item{gdal}{character. GDAL driver specific datasource creation options passed to \code{\link[terra:writeRaster]{terra::writeRaster()}}} +\item{preserve_metadata}{logical. When \code{FALSE} (default), any auxiliary files +that would be written by \code{\link[terra:writeRaster]{terra::writeRaster()}} containing raster metadata +such as units and datetimes are lost (note that this does not include layer +names set with \verb{names() <-}). When \code{TRUE}, these metadata are retained by +archiving all written files as a zip file upon writing and unzipping them +upon reading. This adds extra overhead and will slow pipelines.} + \item{...}{Additional arguments not yet used} \item{tidy_eval}{Logical, whether to enable tidy evaluation From 89e8025282993c9e405fc2dc9642ae0cb43a32d3 Mon Sep 17 00:00:00 2001 From: Eric Scott Date: Mon, 28 Oct 2024 16:17:39 -0700 Subject: [PATCH 05/13] add notes about choice to go with zip archive --- .Rbuildignore | 1 + notes/preserve_metadata_benchmarking.Qmd | 230 +++++++++++++++++++++++ 2 files changed, 231 insertions(+) create mode 100644 notes/preserve_metadata_benchmarking.Qmd diff --git a/.Rbuildignore b/.Rbuildignore index e8e6149..877205b 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -18,3 +18,4 @@ ^CITATION\.cff$ ^logo$ +^notes$ diff --git a/notes/preserve_metadata_benchmarking.Qmd b/notes/preserve_metadata_benchmarking.Qmd new file mode 100644 index 0000000..b5d7223 --- /dev/null +++ b/notes/preserve_metadata_benchmarking.Qmd @@ -0,0 +1,230 @@ +--- +title: Benchmarking options for preserving metadata in `geotargets::tar_terra_rast()` +author: Eric Scott +--- + +```{r} +#for defining read and write functions +library(terra) +library(fs) +library(zip) +library(withr) + +#for benchmarking and profiling +library(microbenchmark) +library(ggplot2) +library(profvis) +``` + +## The problem + +`terra` saves metadata in "sidecar" files but `targets` enforces a 'one target = one file' rule. +There are two ways to get around this: 1) the target is a generic archive containing both the raster and the sidecar file, 1b) the target is a SOzip geospatial archive created by GDAL, or 2) the target is a `PackedSpatRaster` object pointing to the raster and sidecar file saved elsewhere. + +## Example raster + +```{r} +f <- system.file("ex/elev.tif", package="terra") +r <- rast(f) +r <- c(r, r + 10, r/2) +units(r) <- "m" +names(r) <- c("elevation", "elevation+10", "elevation/2") +r +#higher res version for benchmarking +r_highres <- disagg(r, 10) +``` + +```{r} +#"pretend" _targets/ store +target_store <- "_test/object" +dir_create(target_store) +``` + +## Using an archive + +Getting the archive to not contain a bunch of junk paths is tricky (impossible?) with base R's `zip()` or `tar()` functions. +Plus, they have potentially different behaviors on different operating systems. +Fortunately, `zip::zip()` includes a helpful "cherry-pick" mode. + +```{r} +write_to_zip <- function(object, path) { + #rename path to not be confused with fs::path() just to make more readable + out_path <- path + dir_create(path_dir(out_path)) + #do stuff in a fresh local tempdir() that disappears when function is done + tmp <- withr::local_tempdir() + dir_create(tmp, fs::path_dir(out_path)) + #write the raster (hard-coded options for demonstration) + writeRaster(object, + fs::path(tmp, out_path), + filetype = "GTiff", + overwrite = TRUE) + #figure out which files got written + raster_files <- dir_ls(path(tmp, path_dir(out_path))) + #package those into a zip file using `zip::zip()` + zip::zip( + path(tmp, fs::path_file(out_path)), + files = raster_files, + compression_level = 1, + mode = "cherry-pick", + root = fs::path_dir(raster_files) + ) + #move the zip file to the out_path as expected output + file_move(path(tmp, fs::path_file(out_path)), out_path) +} + +read_from_zip <- function(path) { + tmp <- local_tempdir() + #extract into tempdir + zip::unzip(zipfile = path, exdir = tmp) + #read in as rast + rast(fs::path(tmp, fs::path_file(path))) +} +``` + +Testing: + +```{r} +write_to_zip(r, path(target_store, "out1")) +read_from_zip(path(target_store, "out1")) +``` + +A faster alternative *might* be to read/write the file using /vsizip/, but this seems to lose the metadata. +Also, `overwrite = TRUE` doesn't seem to work (). + +```{r} +write_to_sozip <- function(object, path) { + path2 <- paste0("/vsizip/{", path, "}/", basename(path)) + writeRaster(object, + path2, + filetype = "GTiff", + gdal = c("STREAMABLE_OUTPUT=YES", "COMPRESS=NONE"), + overwrite = TRUE) +} +read_vsizip <- function(path) { + rast(paste0("/vsizip/{", path, "}/", path_file(path))) +} +``` + +```{r} +write_to_sozip(object = r, path = path(target_store, "out2")) +read_vsizip(path(target_store, "out2")) |> units() +#even if unzipped, the metadata is missing +read_from_zip(path(target_store, "out2")) |> units() +#because overwite doesn't work: +file_delete(path(target_store, "out2")) +``` + +`read_vsizip()` strips metadata even when used on a "manually" created zip archive as well + +```{r} +read_vsizip(path(target_store, "out1")) |> units() +``` + +## Using cache + +This method uses `terra::wrapCache()` which saves files to a directory, but returns a `PackedSpatRaster` object that could be saved as a .rds (or other) file in the targets store. + +::: callout-important +This method *probably* won't work with some distributed workers unless they have access to the `"_getoargets/"` cache! +::: + +```{r} +cache <- "_geotargets" +dir_create(cache) +``` + +```{r} +write_to_cache <- function(object, path) { + saveRDS( + terra::wrapCache( + object, + filename = fs::path(cache, path_file(path)), + filetype = "GTiff", + overwrite = TRUE), + file = path + ) +} + +read_from_cache <- function(path) { + terra::unwrap(readRDS(path)) +} +``` + +```{r} +write_to_cache(r, path(target_store, "out3")) +read_from_cache(path(target_store, "out3")) +``` + +## Benchmarking + +I'll compare the two (working) methods that produce a single file in the targets store vs the standard `terra` methods for write/read. + +```{r} +benchmark_write <- microbenchmark( + writeRaster = writeRaster(r_highres, + path(target_store, "test1"), + filetype = "GTiff", + overwrite = TRUE), + write_to_zip = write_to_zip(r_highres, path(target_store, "test2")), + write_to_cache = write_to_cache(r_highres, path(target_store, "test3")), + times = 300 +) + + +benchmark_read <- microbenchmark( + rast = rast(path(target_store, "test1")), + read_from_zip = read_from_zip(path(target_store, "test2")), + read_from_cache = read_from_cache(path(target_store, "test3")), + times = 300 +) +``` + +```{r} +benchmark_write +benchmark_read +``` + +```{r} +autoplot(benchmark_write) +autoplot(benchmark_read) +``` + +For both the read and write functions the zip method is slowest and both the zip and cache methods are slower than the basic `terra` methods. + +## Summary + +Whichever method we go with should be *optional* because of the reduced performance. +Users should be able to continue using the current "basic" `terra` read/write methods if they don't need to keep metadata or can store it in the layer names. + +### Archive (zip) method + +pros: + +- No visible change in behavior to users—everything happens behind the scenes + +- Should be compatible with distributed workers (not tested) + +cons: + +- Adds 2-3 direct dependencies (`zip` , `withr`, and `fs` unless I can figure out how to do all the file stuff with base R) + +- Slower than the cache option, although maybe someday it'll work with `/vsizip/` + +### Cache method + +pros: + +- Simpler code + +- Fewer package dependencies added + +- Faster of the two methods + +cons: + +- Users exposed to another cache they shouldn't touch manually + +- Need to create/maintain helper functions to, for example, destroy the cache and invalidate related targets + +- Probably won't work when distributed workers don't have access to this cache From eb25c0cd5b28699fdef846befc4bfc3df55d24cf Mon Sep 17 00:00:00 2001 From: Eric Scott Date: Mon, 28 Oct 2024 16:17:54 -0700 Subject: [PATCH 06/13] update NEWS --- NEWS.md | 1 + 1 file changed, 1 insertion(+) diff --git a/NEWS.md b/NEWS.md index 67e8a79..663e75e 100644 --- a/NEWS.md +++ b/NEWS.md @@ -8,6 +8,7 @@ * Added the `description` argument to all `tar_*()` functions which is passed to `tar_target()`. * Requires GDAL 3.1 or greater to use "ESRI Shapefile" driver in `tar_terra_vect()` (#71, #97) * `geotargets` now requires `targets` version 1.8.0 or higher +* `tar_terra_rast()` gains a `preserve_metadata` option that when `TRUE` reads/writes targets as zip archives that include aux.json "sidecar" files sometimes written by `terra` (#58) # geotargets 0.1.0 (14 May 2024) From c94a50579c56c0ee0cd2b982465c5cb5b5660bdc Mon Sep 17 00:00:00 2001 From: Eric Scott Date: Tue, 29 Oct 2024 10:17:43 -0700 Subject: [PATCH 07/13] move notes to inst/ --- .Rbuildignore | 1 - {notes => inst/notes}/preserve_metadata_benchmarking.Qmd | 0 2 files changed, 1 deletion(-) rename {notes => inst/notes}/preserve_metadata_benchmarking.Qmd (100%) diff --git a/.Rbuildignore b/.Rbuildignore index 877205b..e8e6149 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -18,4 +18,3 @@ ^CITATION\.cff$ ^logo$ -^notes$ diff --git a/notes/preserve_metadata_benchmarking.Qmd b/inst/notes/preserve_metadata_benchmarking.Qmd similarity index 100% rename from notes/preserve_metadata_benchmarking.Qmd rename to inst/notes/preserve_metadata_benchmarking.Qmd From 7505e6d500d1a7bf01827b406b78f3aabb87b29a Mon Sep 17 00:00:00 2001 From: Eric Scott Date: Tue, 29 Oct 2024 10:33:59 -0700 Subject: [PATCH 08/13] update snapshots --- .../_snaps/{tar-terra.md => tar-terra-vect.md} | 15 --------------- tests/testthat/test-tar-terra-vect.R | 1 + 2 files changed, 1 insertion(+), 15 deletions(-) rename tests/testthat/_snaps/{tar-terra.md => tar-terra-vect.md} (74%) diff --git a/tests/testthat/_snaps/tar-terra.md b/tests/testthat/_snaps/tar-terra-vect.md similarity index 74% rename from tests/testthat/_snaps/tar-terra.md rename to tests/testthat/_snaps/tar-terra-vect.md index 28044fc..ab4872c 100644 --- a/tests/testthat/_snaps/tar-terra.md +++ b/tests/testthat/_snaps/tar-terra-vect.md @@ -1,18 +1,3 @@ -# tar_terra_rast() works - - Code - x - Output - class : SpatRaster - dimensions : 90, 95, 1 (nrow, ncol, nlyr) - resolution : 0.008333333, 0.008333333 (x, y) - extent : 5.741667, 6.533333, 49.44167, 50.19167 (xmin, xmax, ymin, ymax) - coord. ref. : lon/lat WGS 84 (EPSG:4326) - source : test_terra_rast - name : elevation - min value : 141 - max value : 547 - # tar_terra_vect() works Code diff --git a/tests/testthat/test-tar-terra-vect.R b/tests/testthat/test-tar-terra-vect.R index 52b05cf..6ec8dde 100644 --- a/tests/testthat/test-tar-terra-vect.R +++ b/tests/testthat/test-tar-terra-vect.R @@ -1,3 +1,4 @@ +# test_that() targets::tar_test("tar_terra_vect() works", { targets::tar_script({ lux_area <- function(projection = "EPSG:4326") { From 977d6f77cb7d3e91518fa4ebf554489e9e11ca83 Mon Sep 17 00:00:00 2001 From: Eric Scott Date: Tue, 29 Oct 2024 10:58:56 -0700 Subject: [PATCH 09/13] implement option for terra_preserve_metadata --- R/geotargets-option.R | 17 ++++++++++++++--- R/tar-terra-rast.R | 26 +++++++++++++++----------- man/geotargets-options.Rd | 11 ++++++++++- man/tar_terra_rast.Rd | 15 ++++++++------- tests/testthat/test-tar-terra-rast.R | 3 ++- 5 files changed, 49 insertions(+), 23 deletions(-) diff --git a/R/geotargets-option.R b/R/geotargets-option.R index b02635c..b5173eb 100644 --- a/R/geotargets-option.R +++ b/R/geotargets-option.R @@ -21,6 +21,13 @@ #' a unique set of creation options. For example, with the default `"GeoJSON"` #' driver: #' +#' @param terra_preserve_metadata character. When `"drop"` (default), any +#' auxiliary files that would be written by [terra::writeRaster()] containing +#' raster metadata such as units and datetimes are lost (note that this does +#' not include layer names set with `names() <-`). When `"zip"`, these +#' metadata are retained by archiving all written files as a zip file upon +#' writing and unzipping them upon reading. This adds extra overhead and will +#' slow pipelines. #' #' @details #' These options can also be set using `options()`. For example, @@ -56,7 +63,8 @@ geotargets_option_set <- function( gdal_raster_driver = NULL, gdal_raster_creation_options = NULL, gdal_vector_driver = NULL, - gdal_vector_creation_options = NULL + gdal_vector_creation_options = NULL, + terra_preserve_metadata = NULL ) { # TODO do this programmatically with formals() or something? `options()` also accepts a named list options( @@ -67,7 +75,9 @@ geotargets_option_set <- function( "geotargets.gdal.vector.driver" = gdal_vector_driver %||% geotargets_option_get("gdal.vector.driver"), "geotargets.gdal.vector.creation.options" = gdal_vector_creation_options %||% - geotargets_option_get("gdal.vector.creation.options") + geotargets_option_get("gdal.vector.creation.options"), + "geotargets.terra.preserve.metadata" = terra_preserve_metadata %||% + geotargets_option_get("terra.preserve.metadata") ) } @@ -87,7 +97,8 @@ geotargets_option_get <- function(name) { "geotargets.gdal.raster.driver", "geotargets.gdal.raster.creation.options", "geotargets.gdal.vector.driver", - "geotargets.gdal.vector.creation.options" + "geotargets.gdal.vector.creation.options", + "geotargets.terra.preserve.metadata" )) env_name <- gsub("\\.", "_", toupper(option_name)) diff --git a/R/tar-terra-rast.R b/R/tar-terra-rast.R index 265c87e..cc664ce 100644 --- a/R/tar-terra-rast.R +++ b/R/tar-terra-rast.R @@ -6,12 +6,13 @@ #' to [terra::writeRaster()] #' @param gdal character. GDAL driver specific datasource creation options #' passed to [terra::writeRaster()] -#' @param preserve_metadata logical. When `FALSE` (default), any auxiliary files -#' that would be written by [terra::writeRaster()] containing raster metadata -#' such as units and datetimes are lost (note that this does not include layer -#' names set with `names() <-`). When `TRUE`, these metadata are retained by -#' archiving all written files as a zip file upon writing and unzipping them -#' upon reading. This adds extra overhead and will slow pipelines. +#' @param preserve_metadata character. When `"drop"` (default), any auxiliary +#' files that would be written by [terra::writeRaster()] containing raster +#' metadata such as units and datetimes are lost (note that this does not +#' include layer names set with `names() <-`). When `"zip"`, these metadata +#' are retained by archiving all written files as a zip file upon writing and +#' unzipping them upon reading. This adds extra overhead and will slow +#' pipelines. #' @param ... Additional arguments not yet used #' #' @inheritParams targets::tar_target @@ -44,7 +45,7 @@ tar_terra_rast <- function(name, pattern = NULL, filetype = geotargets_option_get("gdal.raster.driver"), gdal = geotargets_option_get("gdal.raster.creation.options"), - preserve_metadata = FALSE, #TODO: add a geotargets option for this + preserve_metadata = geotargets_option_get("terra.preserve.metadata"), ..., tidy_eval = targets::tar_option_get("tidy_eval"), packages = targets::tar_option_get("packages"), @@ -68,6 +69,9 @@ tar_terra_rast <- function(name, drv <- get_gdal_available_driver_list("raster") filetype <- rlang::arg_match0(filetype, drv$name) + #currently only "drop" and "zip" are valid options + preserve_metadata <- preserve_metadata %||% "drop" + preserve_metadata <- rlang::arg_match0(preserve_metadata, c("drop", "zip")) #ensure that user-passed `resources` doesn't include `custom_format` if ("custom_format" %in% names(resources)) { @@ -119,19 +123,19 @@ tar_terra_rast <- function(name, } tar_rast_read <- function(preserve_metadata) { - if (isTRUE(preserve_metadata)) { + if (preserve_metadata == "zip") { function(path) { tmp <- withr::local_tempdir() zip::unzip(zipfile = path, exdir = tmp) terra::rast(file.path(tmp, basename(path))) } - } else { + } else if (preserve_metadata == "drop") { function(path) terra::rast(path) } } tar_rast_write <- function(filetype, gdal, preserve_metadata) { - if (isTRUE(preserve_metadata)) { + if (preserve_metadata == "zip") { function(object, path) { #write the raster in a fresh local tempdir() that disappears when function is done tmp <- withr::local_tempdir() @@ -155,7 +159,7 @@ tar_rast_write <- function(filetype, gdal, preserve_metadata) { #move the zip file to the expected place file.rename(file.path(tmp, basename(path)), path) } - } else { + } else if (preserve_metadata == "drop") { function(object, path) { terra::writeRaster( object, diff --git a/man/geotargets-options.Rd b/man/geotargets-options.Rd index 84639cd..7727dbc 100644 --- a/man/geotargets-options.Rd +++ b/man/geotargets-options.Rd @@ -9,7 +9,8 @@ geotargets_option_set( gdal_raster_driver = NULL, gdal_raster_creation_options = NULL, gdal_vector_driver = NULL, - gdal_vector_creation_options = NULL + gdal_vector_creation_options = NULL, + terra_preserve_metadata = NULL ) geotargets_option_get(name) @@ -37,6 +38,14 @@ a unique set of creation options. For example, with the default \code{"GeoJSON"} driver: \url{https://gdal.org/drivers/vector/geojson.html#layer-creation-options}} +\item{terra_preserve_metadata}{character. When \code{"drop"} (default), any +auxiliary files that would be written by \code{\link[terra:writeRaster]{terra::writeRaster()}} containing +raster metadata such as units and datetimes are lost (note that this does +not include layer names set with \verb{names() <-}). When \code{"zip"}, these +metadata are retained by archiving all written files as a zip file upon +writing and unzipping them upon reading. This adds extra overhead and will +slow pipelines.} + \item{name}{character; option name to get.} } \value{ diff --git a/man/tar_terra_rast.Rd b/man/tar_terra_rast.Rd index 1cdf271..1e6b93d 100644 --- a/man/tar_terra_rast.Rd +++ b/man/tar_terra_rast.Rd @@ -10,7 +10,7 @@ tar_terra_rast( pattern = NULL, filetype = geotargets_option_get("gdal.raster.driver"), gdal = geotargets_option_get("gdal.raster.creation.options"), - preserve_metadata = FALSE, + preserve_metadata = geotargets_option_get("terra.preserve.metadata"), ..., tidy_eval = targets::tar_option_get("tidy_eval"), packages = targets::tar_option_get("packages"), @@ -74,12 +74,13 @@ to \code{\link[terra:writeRaster]{terra::writeRaster()}}} \item{gdal}{character. GDAL driver specific datasource creation options passed to \code{\link[terra:writeRaster]{terra::writeRaster()}}} -\item{preserve_metadata}{logical. When \code{FALSE} (default), any auxiliary files -that would be written by \code{\link[terra:writeRaster]{terra::writeRaster()}} containing raster metadata -such as units and datetimes are lost (note that this does not include layer -names set with \verb{names() <-}). When \code{TRUE}, these metadata are retained by -archiving all written files as a zip file upon writing and unzipping them -upon reading. This adds extra overhead and will slow pipelines.} +\item{preserve_metadata}{character. When \code{"drop"} (default), any auxiliary +files that would be written by \code{\link[terra:writeRaster]{terra::writeRaster()}} containing raster +metadata such as units and datetimes are lost (note that this does not +include layer names set with \verb{names() <-}). When \code{"zip"}, these metadata +are retained by archiving all written files as a zip file upon writing and +unzipping them upon reading. This adds extra overhead and will slow +pipelines.} \item{...}{Additional arguments not yet used} diff --git a/tests/testthat/test-tar-terra-rast.R b/tests/testthat/test-tar-terra-rast.R index 85590a6..1b73458 100644 --- a/tests/testthat/test-tar-terra-rast.R +++ b/tests/testthat/test-tar-terra-rast.R @@ -132,6 +132,7 @@ tar_test("metadata is maintained", { library(targets) library(geotargets) library(terra) + geotargets_option_set(terra_preserve_metadata = "zip") make_rast <- function() { f <- system.file("ex/elev.tif", package="terra") r <- terra::rast(f) @@ -141,7 +142,7 @@ tar_test("metadata is maintained", { r } list( - tar_terra_rast(r, make_rast(), preserve_metadata = TRUE) + tar_terra_rast(r, make_rast()) ) }) tar_make() From 3c01a8b25dee99cd29241ad77cc37ddc975787de Mon Sep 17 00:00:00 2001 From: Eric Scott Date: Tue, 29 Oct 2024 11:09:45 -0700 Subject: [PATCH 10/13] move terra, wither, and zip to Imports --- DESCRIPTION | 10 +++++----- R/tar-terra-rast.R | 1 - R/tar-terra-sprc.R | 2 -- R/tar-terra-vect.R | 2 -- 4 files changed, 5 insertions(+), 10 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index bc9c62d..7f0751d 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -34,16 +34,16 @@ RoxygenNote: 7.3.2 Imports: targets (>= 1.8.0), rlang (>= 1.1.3), - cli (>= 3.6.2) + cli (>= 3.6.2), + terra (>= 1.7.71), + withr (>= 3.0.0), + zip Suggests: crew (>= 0.9.2), ncmeta, sf, stars, - terra (>= 1.7.71), - testthat (>= 3.0.0), - withr (>= 3.0.0), - zip + testthat (>= 3.0.0) Config/testthat/edition: 3 URL: https://github.com/njtierney/geotargets, https://njtierney.github.io/geotargets/ BugReports: https://github.com/njtierney/geotargets/issues diff --git a/R/tar-terra-rast.R b/R/tar-terra-rast.R index cc664ce..29ee599 100644 --- a/R/tar-terra-rast.R +++ b/R/tar-terra-rast.R @@ -61,7 +61,6 @@ tar_terra_rast <- function(name, retrieval = targets::tar_option_get("retrieval"), cue = targets::tar_option_get("cue"), description = targets::tar_option_get("description")) { - check_pkg_installed("terra") filetype <- filetype %||% "GTiff" diff --git a/R/tar-terra-sprc.R b/R/tar-terra-sprc.R index 576e818..943c6d3 100644 --- a/R/tar-terra-sprc.R +++ b/R/tar-terra-sprc.R @@ -66,7 +66,6 @@ tar_terra_sprc <- function(name, retrieval = targets::tar_option_get("retrieval"), cue = targets::tar_option_get("cue"), description = targets::tar_option_get("description")) { - check_pkg_installed("terra") #ensure that user-passed `resources` doesn't include `custom_format` if ("custom_format" %in% names(resources)) { cli::cli_abort("{.val custom_format} cannot be supplied to targets created with {.fn tar_terra_sprc}") @@ -184,7 +183,6 @@ tar_terra_sds <- function(name, retrieval = targets::tar_option_get("retrieval"), cue = targets::tar_option_get("cue"), description = targets::tar_option_get("description")) { - check_pkg_installed("terra") #ensure that user-passed `resources` doesn't include `custom_format` if ("custom_format" %in% names(resources)) { cli::cli_abort("{.val custom_format} cannot be supplied to targets created with {.fn tar_terra_sprc}") diff --git a/R/tar-terra-vect.R b/R/tar-terra-vect.R index 34545ab..8d8659d 100644 --- a/R/tar-terra-vect.R +++ b/R/tar-terra-vect.R @@ -62,8 +62,6 @@ tar_terra_vect <- function(name, retrieval = targets::tar_option_get("retrieval"), cue = targets::tar_option_get("cue"), description = targets::tar_option_get("description")) { - check_pkg_installed("terra") - filetype <- filetype %||% "GeoJSON" gdal <- gdal %||% "ENCODING=UTF-8" From 6ea95b1edd80d43549ccfc18fb97d88989fba3f2 Mon Sep 17 00:00:00 2001 From: Eric Scott Date: Tue, 29 Oct 2024 11:11:18 -0700 Subject: [PATCH 11/13] update NEWS --- NEWS.md | 1 + 1 file changed, 1 insertion(+) diff --git a/NEWS.md b/NEWS.md index 663e75e..b1a452e 100644 --- a/NEWS.md +++ b/NEWS.md @@ -9,6 +9,7 @@ * Requires GDAL 3.1 or greater to use "ESRI Shapefile" driver in `tar_terra_vect()` (#71, #97) * `geotargets` now requires `targets` version 1.8.0 or higher * `tar_terra_rast()` gains a `preserve_metadata` option that when `TRUE` reads/writes targets as zip archives that include aux.json "sidecar" files sometimes written by `terra` (#58) +* `terra` (>= 1.7.71), `withr` (>= 3.0.0), and `zip` are now required dependencies of `geotargets` (moved from `Suggests` to `Imports`) # geotargets 0.1.0 (14 May 2024) From 84964695dd70856b20336f03af6a7feee6c8dd49 Mon Sep 17 00:00:00 2001 From: Eric Scott Date: Tue, 29 Oct 2024 11:13:20 -0700 Subject: [PATCH 12/13] update arg description in NEWS --- NEWS.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/NEWS.md b/NEWS.md index b1a452e..a907f04 100644 --- a/NEWS.md +++ b/NEWS.md @@ -8,7 +8,7 @@ * Added the `description` argument to all `tar_*()` functions which is passed to `tar_target()`. * Requires GDAL 3.1 or greater to use "ESRI Shapefile" driver in `tar_terra_vect()` (#71, #97) * `geotargets` now requires `targets` version 1.8.0 or higher -* `tar_terra_rast()` gains a `preserve_metadata` option that when `TRUE` reads/writes targets as zip archives that include aux.json "sidecar" files sometimes written by `terra` (#58) +* `tar_terra_rast()` gains a `preserve_metadata` option that when set to `"zip"` reads/writes targets as zip archives that include aux.json "sidecar" files sometimes written by `terra` (#58) * `terra` (>= 1.7.71), `withr` (>= 3.0.0), and `zip` are now required dependencies of `geotargets` (moved from `Suggests` to `Imports`) # geotargets 0.1.0 (14 May 2024) From 83616a2f46b63bd03c6f3421229abec6eb4ae89a Mon Sep 17 00:00:00 2001 From: Eric Scott Date: Wed, 30 Oct 2024 09:36:02 -0700 Subject: [PATCH 13/13] use switch() instead of if else if --- R/tar-terra-rast.R | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/R/tar-terra-rast.R b/R/tar-terra-rast.R index 29ee599..28f72c9 100644 --- a/R/tar-terra-rast.R +++ b/R/tar-terra-rast.R @@ -122,20 +122,21 @@ tar_terra_rast <- function(name, } tar_rast_read <- function(preserve_metadata) { - if (preserve_metadata == "zip") { - function(path) { + switch( + preserve_metadata, + zip = function(path) { tmp <- withr::local_tempdir() zip::unzip(zipfile = path, exdir = tmp) terra::rast(file.path(tmp, basename(path))) - } - } else if (preserve_metadata == "drop") { - function(path) terra::rast(path) - } + }, + drop = function(path) terra::rast(path) + ) } tar_rast_write <- function(filetype, gdal, preserve_metadata) { - if (preserve_metadata == "zip") { - function(object, path) { + switch( + preserve_metadata, + zip = function(object, path) { #write the raster in a fresh local tempdir() that disappears when function is done tmp <- withr::local_tempdir() dir.create(file.path(tmp, dirname(path)), recursive = TRUE) @@ -157,9 +158,8 @@ tar_rast_write <- function(filetype, gdal, preserve_metadata) { ) #move the zip file to the expected place file.rename(file.path(tmp, basename(path)), path) - } - } else if (preserve_metadata == "drop") { - function(object, path) { + }, + drop = function(object, path) { terra::writeRaster( object, path, @@ -168,5 +168,5 @@ tar_rast_write <- function(filetype, gdal, preserve_metadata) { gdal = gdal ) } - } + ) }