From 74bef918eb74fd8e4f498b3819f6cce9d6343e8a Mon Sep 17 00:00:00 2001
From: Jan Marvin Garbuszus <jan.garbuszus@ruhr-uni-bochum.de>
Date: Wed, 29 Jan 2025 13:28:48 +0100
Subject: [PATCH 01/11] [save] enable PUGIXML_COMPACT

---
 inst/include/pugixml/pugiconfig.hpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/inst/include/pugixml/pugiconfig.hpp b/inst/include/pugixml/pugiconfig.hpp
index c7c9af18e..0101acea4 100644
--- a/inst/include/pugixml/pugiconfig.hpp
+++ b/inst/include/pugixml/pugiconfig.hpp
@@ -18,7 +18,7 @@
 // #define PUGIXML_WCHAR_MODE
 
 // Uncomment this to enable compact mode
-// #define PUGIXML_COMPACT
+#define PUGIXML_COMPACT
 
 // Uncomment this to disable XPath
 // #define PUGIXML_NO_XPATH

From 745c6380fddb46f15af1a197c038d0a3ce159b2d Mon Sep 17 00:00:00 2001
From: Jan Marvin Garbuszus <jan.garbuszus@ruhr-uni-bochum.de>
Date: Wed, 29 Jan 2025 19:41:30 +0100
Subject: [PATCH 02/11] [save] avoid memory blow up

---
 R/class-workbook.R  | 31 +++++++++++++++++--------------
 src/RcppExports.cpp |  4 ++--
 src/write_file.cpp  |  2 +-
 3 files changed, 20 insertions(+), 17 deletions(-)

diff --git a/R/class-workbook.R b/R/class-workbook.R
index 91da0d6b3..e5e68b0bb 100644
--- a/R/class-workbook.R
+++ b/R/class-workbook.R
@@ -9778,40 +9778,43 @@ wbWorkbook <- R6::R6Class(
           }
         } else {
           ## Write worksheets
-          ws <- self$worksheets[[i]]
-          hasHL <- length(ws$hyperlinks) > 0
+          # ws <- self$worksheets[[i]]
+          hasHL <- length(self$worksheets[[i]]$hyperlinks) > 0
 
-          prior <- ws$get_prior_sheet_data()
-          post <- ws$get_post_sheet_data()
+          prior <- self$worksheets[[i]]$get_prior_sheet_data()
+          post <-  self$worksheets[[i]]$get_post_sheet_data()
 
-          if (!is.null(ws$sheet_data$cc)) {
+          if (!is.null(self$worksheets[[i]]$sheet_data$cc)) {
 
-            cc <- ws$sheet_data$cc
-            cc$r <- stringi::stri_join(cc$c_r, cc$row_r)
+            self$worksheets[[i]]$sheet_data$cc$r <- with(
+              self$worksheets[[i]]$sheet_data$cc,
+              stringi::stri_join(c_r, row_r)
+            )
+            cc <- self$worksheets[[i]]$sheet_data$cc
             # prepare data for output
 
             # there can be files, where row_attr is incomplete because a row
             # is lacking any attributes (presumably was added before saving)
             # still row_attr is what we want!
 
-            rows_attr <- ws$sheet_data$row_attr
-            ws$sheet_data$row_attr <- rows_attr[order(as.numeric(rows_attr[, "r"])), ]
+            rows_attr <- self$worksheets[[i]]$sheet_data$row_attr
+            self$worksheets[[i]]$sheet_data$row_attr <- rows_attr[order(as.numeric(rows_attr[, "r"])), ]
 
-            cc_rows <- ws$sheet_data$row_attr$r
+            cc_rows <- self$worksheets[[i]]$sheet_data$row_attr$r
             # c("row_r", "c_r",  "r", "v", "c_t", "c_s", "c_cm", "c_ph", "c_vm", "f", "f_attr", "is")
             cc <- cc[cc$row_r %in% cc_rows, ]
 
-            ws$sheet_data$cc <- cc[order(as.integer(cc[, "row_r"]), col2int(cc[, "c_r"])), ]
+            self$worksheets[[i]]$sheet_data$cc <- cc[order(as.integer(cc[, "row_r"]), col2int(cc[, "c_r"])), ]
           } else {
-            ws$sheet_data$row_attr <- NULL
-            ws$sheet_data$cc <- NULL
+            self$worksheets[[i]]$sheet_data$row_attr <- NULL
+            self$worksheets[[i]]$sheet_data$cc <- NULL
           }
 
           # create entire sheet prior to writing it
           sheet_xml <- write_worksheet(
             prior = prior,
             post = post,
-            sheet_data = ws$sheet_data
+            sheet_data = self$worksheets[[i]]$sheet_data
           )
           ws_file <- file.path(xlworksheetsDir, sprintf("sheet%s.xml", i))
           write_xmlPtr(doc = sheet_xml, fl = ws_file)
diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp
index 87e042c7f..afab64d3f 100644
--- a/src/RcppExports.cpp
+++ b/src/RcppExports.cpp
@@ -854,14 +854,14 @@ BEGIN_RCPP
 END_RCPP
 }
 // write_worksheet
-XPtrXML write_worksheet(std::string prior, std::string post, Rcpp::Environment sheet_data);
+XPtrXML write_worksheet(std::string prior, std::string post, Rcpp::Environment& sheet_data);
 RcppExport SEXP _openxlsx2_write_worksheet(SEXP priorSEXP, SEXP postSEXP, SEXP sheet_dataSEXP) {
 BEGIN_RCPP
     Rcpp::RObject rcpp_result_gen;
     Rcpp::RNGScope rcpp_rngScope_gen;
     Rcpp::traits::input_parameter< std::string >::type prior(priorSEXP);
     Rcpp::traits::input_parameter< std::string >::type post(postSEXP);
-    Rcpp::traits::input_parameter< Rcpp::Environment >::type sheet_data(sheet_dataSEXP);
+    Rcpp::traits::input_parameter< Rcpp::Environment& >::type sheet_data(sheet_dataSEXP);
     rcpp_result_gen = Rcpp::wrap(write_worksheet(prior, post, sheet_data));
     return rcpp_result_gen;
 END_RCPP
diff --git a/src/write_file.cpp b/src/write_file.cpp
index 6cb2dcb90..7509307b3 100644
--- a/src/write_file.cpp
+++ b/src/write_file.cpp
@@ -202,7 +202,7 @@ void xml_sheet_data(pugi::xml_node &doc, Rcpp::DataFrame &row_attr, Rcpp::DataFr
 // create single xml rows of sheet_data.
 //
 // [[Rcpp::export]]
-XPtrXML write_worksheet(std::string prior, std::string post, Rcpp::Environment sheet_data) {
+XPtrXML write_worksheet(std::string prior, std::string post, Rcpp::Environment &sheet_data) {
   uint32_t pugi_parse_flags = pugi::parse_cdata | pugi::parse_wconv_attribute | pugi::parse_ws_pcdata | pugi::parse_eol;
 
   // sheet_data will be in order, just need to check for row_heights

From 34e66d3c735393c79f51b8c16c1511c30d6920a3 Mon Sep 17 00:00:00 2001
From: Jan Marvin Garbuszus <jan.garbuszus@ruhr-uni-bochum.de>
Date: Sat, 20 Jul 2024 19:24:42 +0200
Subject: [PATCH 03/11] [writing] provide openxlsx2.export_with_pugi = FALSE
 option

---
 R/RcppExports.R             |   4 +
 R/class-workbook.R          |  77 +++++++++------
 src/RcppExports.cpp         |  14 +++
 src/write_file.cpp          | 186 ++++++++++++++++++++++++++++++++++++
 tests/testthat/test-write.R |  22 +++++
 5 files changed, 276 insertions(+), 27 deletions(-)

diff --git a/R/RcppExports.R b/R/RcppExports.R
index 77566d09a..43a519d5c 100644
--- a/R/RcppExports.R
+++ b/R/RcppExports.R
@@ -340,6 +340,10 @@ set_sst <- function(sharedStrings) {
     .Call(`_openxlsx2_set_sst`, sharedStrings)
 }
 
+write_worksheet_slim <- function(sheet_data, prior, post, fl) {
+    invisible(.Call(`_openxlsx2_write_worksheet_slim`, sheet_data, prior, post, fl))
+}
+
 write_worksheet <- function(prior, post, sheet_data) {
     .Call(`_openxlsx2_write_worksheet`, prior, post, sheet_data)
 }
diff --git a/R/class-workbook.R b/R/class-workbook.R
index e5e68b0bb..2cc17a210 100644
--- a/R/class-workbook.R
+++ b/R/class-workbook.R
@@ -9784,41 +9784,64 @@ wbWorkbook <- R6::R6Class(
           prior <- self$worksheets[[i]]$get_prior_sheet_data()
           post <-  self$worksheets[[i]]$get_post_sheet_data()
 
-          if (!is.null(self$worksheets[[i]]$sheet_data$cc)) {
+          use_pugixml_export <- getOption("openxlsx2.export_with_pugi", default = TRUE)
 
-            self$worksheets[[i]]$sheet_data$cc$r <- with(
-              self$worksheets[[i]]$sheet_data$cc,
-              stringi::stri_join(c_r, row_r)
-            )
-            cc <- self$worksheets[[i]]$sheet_data$cc
-            # prepare data for output
+          if (use_pugixml_export) {
+            # failsaves. check that all rows and cells
+            # are available and in the correct order
+            if (!is.null(self$worksheets[[i]]$sheet_data$cc)) {
+
+              self$worksheets[[i]]$sheet_data$cc$r <- with(
+                self$worksheets[[i]]$sheet_data$cc,
+                stringi::stri_join(c_r, row_r)
+              )
+              cc <- self$worksheets[[i]]$sheet_data$cc
+              # prepare data for output
 
-            # there can be files, where row_attr is incomplete because a row
-            # is lacking any attributes (presumably was added before saving)
-            # still row_attr is what we want!
+              # there can be files, where row_attr is incomplete because a row
+              # is lacking any attributes (presumably was added before saving)
+              # still row_attr is what we want!
 
-            rows_attr <- self$worksheets[[i]]$sheet_data$row_attr
-            self$worksheets[[i]]$sheet_data$row_attr <- rows_attr[order(as.numeric(rows_attr[, "r"])), ]
+              rows_attr <- self$worksheets[[i]]$sheet_data$row_attr
+              self$worksheets[[i]]$sheet_data$row_attr <- rows_attr[order(as.numeric(rows_attr[, "r"])), ]
 
-            cc_rows <- self$worksheets[[i]]$sheet_data$row_attr$r
-            # c("row_r", "c_r",  "r", "v", "c_t", "c_s", "c_cm", "c_ph", "c_vm", "f", "f_attr", "is")
-            cc <- cc[cc$row_r %in% cc_rows, ]
+              cc_rows <- self$worksheets[[i]]$sheet_data$row_attr$r
+              # c("row_r", "c_r",  "r", "v", "c_t", "c_s", "c_cm", "c_ph", "c_vm", "f", "f_attr", "is")
+              cc <- cc[cc$row_r %in% cc_rows, ]
 
-            self$worksheets[[i]]$sheet_data$cc <- cc[order(as.integer(cc[, "row_r"]), col2int(cc[, "c_r"])), ]
-          } else {
-            self$worksheets[[i]]$sheet_data$row_attr <- NULL
-            self$worksheets[[i]]$sheet_data$cc <- NULL
+              self$worksheets[[i]]$sheet_data$cc <- cc[order(as.integer(cc[, "row_r"]), col2int(cc[, "c_r"])), ]
+              rm(cc)
+            } else {
+              self$worksheets[[i]]$sheet_data$row_attr <- NULL
+              self$worksheets[[i]]$sheet_data$cc <- NULL
+            }
           }
 
-          # create entire sheet prior to writing it
-          sheet_xml <- write_worksheet(
-            prior = prior,
-            post = post,
-            sheet_data = self$worksheets[[i]]$sheet_data
-          )
           ws_file <- file.path(xlworksheetsDir, sprintf("sheet%s.xml", i))
-          write_xmlPtr(doc = sheet_xml, fl = ws_file)
-          rm(sheet_xml)
+
+          if (use_pugixml_export) {
+
+            # create entire sheet prior to writing it
+            sheet_xml <- write_worksheet(
+              prior      = prior,
+              post       = post,
+              sheet_data = self$worksheets[[i]]$sheet_data
+            )
+            write_xmlPtr(doc = sheet_xml, fl = ws_file)
+
+          } else {
+
+            if (grepl("</worksheet>", prior))
+              prior <- substr(prior, 1, nchar(prior) - 13) # remove " </worksheet>"
+
+            write_worksheet_slim(
+              sheet_data = self$worksheets[[i]]$sheet_data,
+              prior      = prior,
+              post       = post,
+              fl         = ws_file
+            )
+
+          }
 
           ## write worksheet rels
           if (length(self$worksheets_rels[[i]])) {
diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp
index afab64d3f..eee07da48 100644
--- a/src/RcppExports.cpp
+++ b/src/RcppExports.cpp
@@ -853,6 +853,19 @@ BEGIN_RCPP
     return rcpp_result_gen;
 END_RCPP
 }
+// write_worksheet_slim
+void write_worksheet_slim(Rcpp::Environment sheet_data, std::string prior, std::string post, std::string fl);
+RcppExport SEXP _openxlsx2_write_worksheet_slim(SEXP sheet_dataSEXP, SEXP priorSEXP, SEXP postSEXP, SEXP flSEXP) {
+BEGIN_RCPP
+    Rcpp::RNGScope rcpp_rngScope_gen;
+    Rcpp::traits::input_parameter< Rcpp::Environment >::type sheet_data(sheet_dataSEXP);
+    Rcpp::traits::input_parameter< std::string >::type prior(priorSEXP);
+    Rcpp::traits::input_parameter< std::string >::type post(postSEXP);
+    Rcpp::traits::input_parameter< std::string >::type fl(flSEXP);
+    write_worksheet_slim(sheet_data, prior, post, fl);
+    return R_NilValue;
+END_RCPP
+}
 // write_worksheet
 XPtrXML write_worksheet(std::string prior, std::string post, Rcpp::Environment& sheet_data);
 RcppExport SEXP _openxlsx2_write_worksheet(SEXP priorSEXP, SEXP postSEXP, SEXP sheet_dataSEXP) {
@@ -1039,6 +1052,7 @@ static const R_CallMethodDef CallEntries[] = {
     {"_openxlsx2_read_colors", (DL_FUNC) &_openxlsx2_read_colors, 1},
     {"_openxlsx2_write_colors", (DL_FUNC) &_openxlsx2_write_colors, 1},
     {"_openxlsx2_set_sst", (DL_FUNC) &_openxlsx2_set_sst, 1},
+    {"_openxlsx2_write_worksheet_slim", (DL_FUNC) &_openxlsx2_write_worksheet_slim, 4},
     {"_openxlsx2_write_worksheet", (DL_FUNC) &_openxlsx2_write_worksheet, 3},
     {"_openxlsx2_write_xmlPtr", (DL_FUNC) &_openxlsx2_write_xmlPtr, 2},
     {"_openxlsx2_styles_bin", (DL_FUNC) &_openxlsx2_styles_bin, 3},
diff --git a/src/write_file.cpp b/src/write_file.cpp
index 7509307b3..2f81d83ce 100644
--- a/src/write_file.cpp
+++ b/src/write_file.cpp
@@ -20,6 +20,192 @@ Rcpp::CharacterVector set_sst(Rcpp::CharacterVector sharedStrings) {
   return sst;
 }
 
+// write xml by streaming to files. this takes whatever input we provide and
+// dumps it into the file. no xml checking, no unicode checking
+void xml_sheet_data_slim(
+    Rcpp::DataFrame row_attr,
+    Rcpp::DataFrame cc,
+    std::string prior,
+    std::string post,
+    std::string fl
+) {
+
+  bool has_cm = cc.containsElementNamed("c_cm");
+  bool has_ph = cc.containsElementNamed("c_ph");
+  bool has_vm = cc.containsElementNamed("c_vm");
+
+  std::ofstream file(fl);
+
+  auto lastrow = 0; // integer value of the last row with column data
+  auto thisrow = 0; // integer value of the current row with column data
+  auto row_idx = 0; // the index of the row_attr file. this is != rowid
+  auto rowid   = 0; // integer value of the r field in row_attr
+
+  std::string xml_preserver = " ";
+
+  file << "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n";
+  file << prior;
+
+  Rcpp::CharacterVector cc_c_cm, cc_c_ph, cc_c_vm;
+
+  if (cc.nrow() && cc.ncol()) {
+    // we cannot access rows directly in the dataframe.
+    // Have to extract the columns and use these
+    Rcpp::CharacterVector cc_row_r  = cc["row_r"];  // 1
+    Rcpp::CharacterVector cc_r      = cc["r"];      // A1
+    Rcpp::CharacterVector cc_v      = cc["v"];
+    Rcpp::CharacterVector cc_c_t    = cc["c_t"];
+    Rcpp::CharacterVector cc_c_s    = cc["c_s"];
+    if (has_cm) cc_c_cm  = cc["c_cm"];
+    if (has_ph) cc_c_ph  = cc["c_ph"];
+    if (has_vm) cc_c_vm  = cc["c_vm"];
+    Rcpp::CharacterVector cc_f      = cc["f"];
+    Rcpp::CharacterVector cc_f_attr = cc["f_attr"];
+    Rcpp::CharacterVector cc_is     = cc["is"];
+
+    Rcpp::CharacterVector row_r     = row_attr["r"];
+
+
+    file << "<sheetData>";
+    for (auto i = 0; i < cc.nrow(); ++i) {
+
+      thisrow = std::stoi(Rcpp::as<std::string>(cc_row_r[i]));
+
+      if (lastrow < thisrow) {
+
+        // there might be entirely empty rows in between. this is the case for
+        // loadExample. We check the rowid and write the line and skip until we
+        // have every row and only then continue writing the column
+        while (rowid < thisrow) {
+
+          rowid = std::stoi(Rcpp::as<std::string>(
+            row_r[row_idx]
+          ));
+
+          if (row_idx) file << "</row>";
+          file << "<row";
+          Rcpp::CharacterVector attrnams = row_attr.names();
+
+          for (auto j = 0; j < row_attr.ncol(); ++j) {
+
+            Rcpp::CharacterVector cv_s = "";
+            cv_s = Rcpp::as<Rcpp::CharacterVector>(row_attr[j])[row_idx];
+
+            if (cv_s[0] != "") {
+              const std::string val_strl = Rcpp::as<std::string>(cv_s);
+              file << " " << attrnams[j] << "=\"" << val_strl.c_str() << "\"";
+            }
+          }
+          file << ">"; // end <r ...>
+
+          // read the next row_idx when visiting again
+          ++row_idx;
+        }
+      }
+
+      // create node <c>
+      file << "<c";
+
+      // Every cell consists of a typ and a val list. Certain functions have an
+      // additional attr list.
+
+      // append attributes <c r="A1" ...>
+      file << " r" << "=\"" << to_string(cc_r[i]).c_str() << "\"";
+
+      if (!to_string(cc_c_s[i]).empty())
+        file << " s" << "=\"" << to_string(cc_c_s[i]).c_str() << "\"";
+
+      // assign type if not <v> aka numeric
+      if (!to_string(cc_c_t[i]).empty())
+        file << " t" << "=\"" << to_string(cc_c_t[i]).c_str() << "\"";
+
+      // CellMetaIndex: suppress curly brackets in spreadsheet software
+      if (has_cm && !to_string(cc_c_cm[i]).empty())
+        file << " cm" << "=\"" << to_string(cc_c_cm[i]).c_str() << "\"";
+
+      // phonetics spelling
+      if (has_ph && !to_string(cc_c_ph[i]).empty())
+        file << " ph" << "=\"" << to_string(cc_c_ph[i]).c_str() << "\"";
+
+      // suppress curly brackets in spreadsheet software
+      if (has_vm && !to_string(cc_c_vm[i]).empty())
+        file << " vm" << "=\"" << to_string(cc_c_vm[i]).c_str() << "\"";
+
+      file << ">"; // end <c ...>
+
+      bool f_si = false;
+
+      // <f> ... </f>
+      // f node: formula to be evaluated
+      if (!to_string(cc_f[i]).empty() || !to_string(cc_f_attr[i]).empty()) {
+        file << "<f";
+        if (!to_string(cc_f_attr[i]).empty()) {
+          file << to_string(cc_f_attr[i]).c_str();
+        }
+        file << ">";
+
+        file << to_string(cc_f[i]).c_str();
+
+        file << "</f>";
+      }
+
+      // v node: value stored from evaluated formula
+      if (!to_string(cc_v[i]).empty()) {
+        if (!f_si & (to_string(cc_v[i]).compare(xml_preserver.c_str()) == 0)) {
+          // this looks strange
+          file << "<v xml:space=\"preserve\">";
+          file << " ";
+          file << "</v>";
+        } else {
+          file << "<v>" << to_string(cc_v[i]).c_str() << "</v>";
+        }
+      }
+
+      // <is><t> ... </t></is>
+      if (to_string(cc_c_t[i]).compare("inlineStr") == 0) {
+        if (!to_string(cc_is[i]).empty()) {
+          file << to_string(cc_is[i]).c_str();
+        }
+      }
+
+      file << "</c>";
+
+      // update lastrow
+      lastrow = thisrow;
+    }
+
+    file << "</row>";
+    file << "</sheetData>";
+  } else {
+    file << "<sheetData/>";
+  }
+
+
+  file << post;
+  file << "</worksheet>";
+
+  file.close();
+
+}
+
+// export worksheet without pugixml
+// this should be way quicker, uses far less memory, but also skips all of the checks pugi does
+//
+// [[Rcpp::export]]
+void write_worksheet_slim(
+    Rcpp::Environment sheet_data,
+    std::string prior,
+    std::string post,
+    std::string fl
+){
+  // sheet_data will be in order, just need to check for row_heights
+  // CharacterVector cell_col = int_to_col(sheet_data.field("cols"));
+  Rcpp::DataFrame row_attr = Rcpp::as<Rcpp::DataFrame>(sheet_data["row_attr"]);
+  Rcpp::DataFrame cc = Rcpp::as<Rcpp::DataFrame>(sheet_data["cc"]);
+
+  xml_sheet_data_slim(row_attr, cc, prior, post, fl);
+}
+
 // creates an xml row
 // data in xml is ordered row wise. therefore we need the row attributes and
 // the column data used in this row. This function uses both to create a single
diff --git a/tests/testthat/test-write.R b/tests/testthat/test-write.R
index e57d13d4a..8c94cc34e 100644
--- a/tests/testthat/test-write.R
+++ b/tests/testthat/test-write.R
@@ -1470,5 +1470,27 @@ test_that("guarding against overwriting shared formula reference works", {
   exp <- c("1", "2", "B1 + 1", "C1 + 1")
   got <- unname(unlist(wb$to_df(show_formula = TRUE, col_names = FALSE)))
   expect_equal(exp, got)
+})
+
+test_that("writing without pugixml works", {
+
+  temp <- temp_xlsx()
+  expect_silent(write_xlsx(x = mtcars, file = temp))
+  expect_silent(wb <- wb_load(temp))
+
+  temp <- temp_xlsx()
+  options("openxlsx2.export_with_pugi" = FALSE)
+  expect_silent(write_xlsx(x = mtcars, file = temp))
+  expect_silent(wb <- wb_load(temp))
+
+  temp <- temp_xlsx()
+  options("openxlsx2.export_with_pugi" = TRUE)
+  expect_silent(write_xlsx(x = mtcars, file = temp))
+  expect_silent(wb <- wb_load(temp))
+
+  temp <- temp_xlsx()
+  options("openxlsx2.export_with_pugi" = NULL)
+  expect_silent(write_xlsx(x = mtcars, file = temp))
+  expect_silent(wb <- wb_load(temp))
 
 })

From bb91cbcdd7ad79c56620bd570aff49dd27a7d9d8 Mon Sep 17 00:00:00 2001
From: Jan Marvin Garbuszus <jan.garbuszus@rub.de>
Date: Mon, 3 Mar 2025 13:11:43 +0100
Subject: [PATCH 04/11] [save] provide `flush` argument

---
 R/class-workbook-wrappers.R | 18 ++++++++++++++++--
 R/class-workbook.R          | 11 ++++++-----
 R/write_xlsx.R              | 11 +++++++++--
 man/wbWorkbook.Rd           |  4 +++-
 man/wb_save.Rd              | 18 +++++++++++++++++-
 5 files changed, 51 insertions(+), 11 deletions(-)

diff --git a/R/class-workbook-wrappers.R b/R/class-workbook-wrappers.R
index fe3272470..9f3bbc7ec 100644
--- a/R/class-workbook-wrappers.R
+++ b/R/class-workbook-wrappers.R
@@ -67,10 +67,24 @@ wb_workbook <- function(
 
 #' Save a workbook to file
 #'
+#' @details When saving a `wbWorkbook` to a file, memory usage may spike
+#' depending on the worksheet size. This happens because the entire XML
+#' structure is created in memory before writing to disk. The memory
+#' required depends on worksheet size, as XML files consist of character
+#' data and include additional overhead for validity checks.
+#'
+#' The `flush` argument streams worksheet XML data directly to disk,
+#' avoiding the need to build the full XML tree in memory. This reduces
+#' memory usage but skips some XML validity checks. It also bypasses
+#' the `pugixml` functions that `openxlsx2` uses, omitting certain
+#' preliminary sanity checks before writing. As the name suggests,
+#' the output is simply flushed to disk.
+#'
 #' @param wb A `wbWorkbook` object to write to file
 #' @param file A path to save the workbook to
 #' @param overwrite If `FALSE`, will not overwrite when `file` already exists.
 #' @param path Deprecated argument. Please use `file` in new code.
+#' @param flush Experimental, streams the worksheet file to disk
 #'
 #' @export
 #' @family workbook wrappers
@@ -86,9 +100,9 @@ wb_workbook <- function(
 #' \donttest{
 #' wb_save(wb, file = temp_xlsx(), overwrite = TRUE)
 #' }
-wb_save <- function(wb, file = NULL, overwrite = TRUE, path = NULL) {
+wb_save <- function(wb, file = NULL, overwrite = TRUE, path = NULL, flush = FALSE) {
   assert_workbook(wb)
-  wb$clone()$save(file = file, overwrite = overwrite, path = path)
+  wb$clone()$save(file = file, overwrite = overwrite, path = path, flush = flush)
 }
 
 # add data ----------------------------------------------------------------
diff --git a/R/class-workbook.R b/R/class-workbook.R
index 2cc17a210..3022df014 100644
--- a/R/class-workbook.R
+++ b/R/class-workbook.R
@@ -2910,8 +2910,9 @@ wbWorkbook <- R6::R6Class(
     #' @param file The path to save the workbook to
     #' @param overwrite If `FALSE`, will not overwrite when `path` exists
     #' @param path Deprecated argument previously used for file. Please use file in new code.
+    #' @param flush Experimental, streams the worksheet file to disk
     #' @return The `wbWorkbook` object invisibly
-    save = function(file = self$path, overwrite = TRUE, path = NULL) {
+    save = function(file = self$path, overwrite = TRUE, path = NULL, flush = FALSE) {
 
       if (!is.null(path)) {
         .Deprecated(old = "wb_save(path)", new = "wb_save(file)", package = "openxlsx2")
@@ -3435,7 +3436,8 @@ wbWorkbook <- R6::R6Class(
         xlchartsDir,
         xlchartsRelsDir,
         xlworksheetsDir,
-        xlworksheetsRelsDir
+        xlworksheetsRelsDir,
+        use_pugixml_export = isFALSE(flush)
       )
 
       ## write sharedStrings.xml
@@ -9643,7 +9645,8 @@ wbWorkbook <- R6::R6Class(
       xlchartsDir,
       xlchartsRelsDir,
       xlworksheetsDir,
-      xlworksheetsRelsDir
+      xlworksheetsRelsDir,
+      use_pugixml_export
     ) {
 
       ## write charts
@@ -9784,8 +9787,6 @@ wbWorkbook <- R6::R6Class(
           prior <- self$worksheets[[i]]$get_prior_sheet_data()
           post <-  self$worksheets[[i]]$get_post_sheet_data()
 
-          use_pugixml_export <- getOption("openxlsx2.export_with_pugi", default = TRUE)
-
           if (use_pugixml_export) {
             # failsaves. check that all rows and cells
             # are available and in the correct order
diff --git a/R/write_xlsx.R b/R/write_xlsx.R
index 05b3ca5b3..3fac2ea44 100644
--- a/R/write_xlsx.R
+++ b/R/write_xlsx.R
@@ -58,7 +58,8 @@ write_xlsx <- function(x, file, as_table = FALSE, ...) {
     "table_name", "with_filter", "first_active_row", "first_active_col",
     "first_row", "first_col", "col_widths", "na.strings",
     "overwrite", "title", "subject", "category",
-    "font_size", "font_color", "font_name"
+    "font_size", "font_color", "font_name",
+    "flush"
   )
 
   params <- list(...)
@@ -296,6 +297,12 @@ write_xlsx <- function(x, file, as_table = FALSE, ...) {
     font_args$font_name <- params$font_name
   }
 
+  # Flush stream file to disk
+  flush <- FALSE
+  if ("flush" %in% names(params)) {
+    flush <- params$flush
+  }
+
 
   ## create new Workbook object
   wb <- wb_workbook(creator = creator, title = title, subject = subject, category = category)
@@ -479,7 +486,7 @@ write_xlsx <- function(x, file, as_table = FALSE, ...) {
   }
 
   if (!missing(file))
-    wb_save(wb, file = file, overwrite = overwrite)
+    wb_save(wb, file = file, overwrite = overwrite, flush = flush)
 
   invisible(wb)
 }
diff --git a/man/wbWorkbook.Rd b/man/wbWorkbook.Rd
index 42fe1d887..d72d628de 100644
--- a/man/wbWorkbook.Rd
+++ b/man/wbWorkbook.Rd
@@ -1102,7 +1102,7 @@ The \code{wbWorkbook} object invisibly
 \subsection{Method \code{save()}}{
 Save the workbook
 \subsection{Usage}{
-\if{html}{\out{<div class="r">}}\preformatted{wbWorkbook$save(file = self$path, overwrite = TRUE, path = NULL)}\if{html}{\out{</div>}}
+\if{html}{\out{<div class="r">}}\preformatted{wbWorkbook$save(file = self$path, overwrite = TRUE, path = NULL, flush = FALSE)}\if{html}{\out{</div>}}
 }
 
 \subsection{Arguments}{
@@ -1113,6 +1113,8 @@ Save the workbook
 \item{\code{overwrite}}{If \code{FALSE}, will not overwrite when \code{path} exists}
 
 \item{\code{path}}{Deprecated argument previously used for file. Please use file in new code.}
+
+\item{\code{flush}}{Experimental, streams the worksheet file to disk}
 }
 \if{html}{\out{</div>}}
 }
diff --git a/man/wb_save.Rd b/man/wb_save.Rd
index 44beff524..dc591de23 100644
--- a/man/wb_save.Rd
+++ b/man/wb_save.Rd
@@ -4,7 +4,7 @@
 \alias{wb_save}
 \title{Save a workbook to file}
 \usage{
-wb_save(wb, file = NULL, overwrite = TRUE, path = NULL)
+wb_save(wb, file = NULL, overwrite = TRUE, path = NULL, flush = FALSE)
 }
 \arguments{
 \item{wb}{A \code{wbWorkbook} object to write to file}
@@ -14,6 +14,8 @@ wb_save(wb, file = NULL, overwrite = TRUE, path = NULL)
 \item{overwrite}{If \code{FALSE}, will not overwrite when \code{file} already exists.}
 
 \item{path}{Deprecated argument. Please use \code{file} in new code.}
+
+\item{flush}{Experimental, streams the worksheet file to disk}
 }
 \value{
 the \code{wbWorkbook} object, invisibly
@@ -21,6 +23,20 @@ the \code{wbWorkbook} object, invisibly
 \description{
 Save a workbook to file
 }
+\details{
+When saving a \code{wbWorkbook} to a file, memory usage may spike
+depending on the worksheet size. This happens because the entire XML
+structure is created in memory before writing to disk. The memory
+required depends on worksheet size, as XML files consist of character
+data and include additional overhead for validity checks.
+
+The \code{flush} argument streams worksheet XML data directly to disk,
+avoiding the need to build the full XML tree in memory. This reduces
+memory usage but skips some XML validity checks. It also bypasses
+the \code{pugixml} functions that \code{openxlsx2} uses, omitting certain
+preliminary sanity checks before writing. As the name suggests,
+the output is simply flushed to disk.
+}
 \examples{
 ## Create a new workbook and add a worksheet
 wb <- wb_workbook("Creator of workbook")

From 306a58c37f631fadae3d3e79408ba172b3231772 Mon Sep 17 00:00:00 2001
From: Jan Marvin Garbuszus <jan.garbuszus@rub.de>
Date: Mon, 3 Mar 2025 13:11:51 +0100
Subject: [PATCH 05/11] Revert "[save] enable PUGIXML_COMPACT"

This reverts commit 74bef918eb74fd8e4f498b3819f6cce9d6343e8a.
---
 inst/include/pugixml/pugiconfig.hpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/inst/include/pugixml/pugiconfig.hpp b/inst/include/pugixml/pugiconfig.hpp
index 0101acea4..c7c9af18e 100644
--- a/inst/include/pugixml/pugiconfig.hpp
+++ b/inst/include/pugixml/pugiconfig.hpp
@@ -18,7 +18,7 @@
 // #define PUGIXML_WCHAR_MODE
 
 // Uncomment this to enable compact mode
-#define PUGIXML_COMPACT
+// #define PUGIXML_COMPACT
 
 // Uncomment this to disable XPath
 // #define PUGIXML_NO_XPATH

From 8b7f60cc37eb5172f0a7d11507b7ceda35683e66 Mon Sep 17 00:00:00 2001
From: Jan Marvin Garbuszus <jan.garbuszus@rub.de>
Date: Mon, 3 Mar 2025 13:14:08 +0100
Subject: [PATCH 06/11] [save] assert `flush`

---
 R/class-workbook.R | 1 +
 1 file changed, 1 insertion(+)

diff --git a/R/class-workbook.R b/R/class-workbook.R
index 3022df014..23f931ed4 100644
--- a/R/class-workbook.R
+++ b/R/class-workbook.R
@@ -2921,6 +2921,7 @@ wbWorkbook <- R6::R6Class(
 
       assert_class(file, "character")
       assert_class(overwrite, "logical")
+      assert_class(flush, "logical")
 
       if (file.exists(file) & !overwrite) {
         stop("File already exists!")

From bc409542297a439415707ef0aa020c3634b020cd Mon Sep 17 00:00:00 2001
From: Jan Marvin Garbuszus <jan.garbuszus@rub.de>
Date: Mon, 3 Mar 2025 13:14:46 +0100
Subject: [PATCH 07/11] [misc] update WORDLIST

---
 inst/WORDLIST | 1 -
 1 file changed, 1 deletion(-)

diff --git a/inst/WORDLIST b/inst/WORDLIST
index 9c5ed25e1..e2862fd54 100644
--- a/inst/WORDLIST
+++ b/inst/WORDLIST
@@ -407,7 +407,6 @@ veryHidden
 vm
 vml
 wb
-wbColor
 wbWorkbook
 wedgeEllipseCallout
 wedgeRectCallout

From e5cc58a5c789f91e75b13afe2aff921fb2763cbb Mon Sep 17 00:00:00 2001
From: Jan Marvin Garbuszus <jan.garbuszus@rub.de>
Date: Mon, 3 Mar 2025 13:32:26 +0100
Subject: [PATCH 08/11] [test] fix tests for `flush = TRUE`

---
 src/write_file.cpp | 17 ++++++++++++++++-
 1 file changed, 16 insertions(+), 1 deletion(-)

diff --git a/src/write_file.cpp b/src/write_file.cpp
index 2f81d83ce..777982cda 100644
--- a/src/write_file.cpp
+++ b/src/write_file.cpp
@@ -103,6 +103,21 @@ void xml_sheet_data_slim(
         }
       }
 
+
+    if (
+      cc_c_s[i].empty() &&
+      cc_c_t[i].empty() &&
+      (!has_cm || (has_cm && cc_c_cm[i].empty())) &&
+      (!has_ph || (has_ph && cc_c_ph[i].empty())) &&
+      (!has_vm || (has_vm && cc_c_vm[i].empty())) &&
+      cc_v[i].empty() &&
+      cc_f[i].empty() &&
+      cc_f_attr[i].empty() &&
+      cc_is[i].empty()
+    ) {
+      continue;
+    }
+
       // create node <c>
       file << "<c";
 
@@ -140,7 +155,7 @@ void xml_sheet_data_slim(
       if (!to_string(cc_f[i]).empty() || !to_string(cc_f_attr[i]).empty()) {
         file << "<f";
         if (!to_string(cc_f_attr[i]).empty()) {
-          file << to_string(cc_f_attr[i]).c_str();
+          file << " " << to_string(cc_f_attr[i]).c_str();
         }
         file << ">";
 

From ae316b032070334294c5676f95958f301ad395ab Mon Sep 17 00:00:00 2001
From: Jan Marvin Garbuszus <jan.garbuszus@rub.de>
Date: Mon, 3 Mar 2025 13:37:52 +0100
Subject: [PATCH 09/11] [save] clang-format

---
 src/write_file.cpp | 46 ++++++++++++++++++++--------------------------
 1 file changed, 20 insertions(+), 26 deletions(-)

diff --git a/src/write_file.cpp b/src/write_file.cpp
index 777982cda..af89faf58 100644
--- a/src/write_file.cpp
+++ b/src/write_file.cpp
@@ -36,10 +36,10 @@ void xml_sheet_data_slim(
 
   std::ofstream file(fl);
 
-  auto lastrow = 0; // integer value of the last row with column data
-  auto thisrow = 0; // integer value of the current row with column data
-  auto row_idx = 0; // the index of the row_attr file. this is != rowid
-  auto rowid   = 0; // integer value of the r field in row_attr
+  auto lastrow = 0;  // integer value of the last row with column data
+  auto thisrow = 0;  // integer value of the current row with column data
+  auto row_idx = 0;  // the index of the row_attr file. this is != rowid
+  auto rowid = 0;    // integer value of the r field in row_attr
 
   std::string xml_preserver = " ";
 
@@ -68,11 +68,9 @@ void xml_sheet_data_slim(
 
     file << "<sheetData>";
     for (auto i = 0; i < cc.nrow(); ++i) {
-
       thisrow = std::stoi(Rcpp::as<std::string>(cc_row_r[i]));
 
       if (lastrow < thisrow) {
-
         // there might be entirely empty rows in between. this is the case for
         // loadExample. We check the rowid and write the line and skip until we
         // have every row and only then continue writing the column
@@ -87,7 +85,6 @@ void xml_sheet_data_slim(
           Rcpp::CharacterVector attrnams = row_attr.names();
 
           for (auto j = 0; j < row_attr.ncol(); ++j) {
-
             Rcpp::CharacterVector cv_s = "";
             cv_s = Rcpp::as<Rcpp::CharacterVector>(row_attr[j])[row_idx];
 
@@ -96,27 +93,26 @@ void xml_sheet_data_slim(
               file << " " << attrnams[j] << "=\"" << val_strl.c_str() << "\"";
             }
           }
-          file << ">"; // end <r ...>
+          file << ">";  // end <r ...>
 
           // read the next row_idx when visiting again
           ++row_idx;
         }
       }
 
-
-    if (
-      cc_c_s[i].empty() &&
-      cc_c_t[i].empty() &&
-      (!has_cm || (has_cm && cc_c_cm[i].empty())) &&
-      (!has_ph || (has_ph && cc_c_ph[i].empty())) &&
-      (!has_vm || (has_vm && cc_c_vm[i].empty())) &&
-      cc_v[i].empty() &&
-      cc_f[i].empty() &&
-      cc_f_attr[i].empty() &&
-      cc_is[i].empty()
-    ) {
-      continue;
-    }
+      if (
+        cc_c_s[i].empty() &&
+        cc_c_t[i].empty() &&
+        (!has_cm || (has_cm && cc_c_cm[i].empty())) &&
+        (!has_ph || (has_ph && cc_c_ph[i].empty())) &&
+        (!has_vm || (has_vm && cc_c_vm[i].empty())) &&
+        cc_v[i].empty() &&
+        cc_f[i].empty() &&
+        cc_f_attr[i].empty() &&
+        cc_is[i].empty()
+      ) {
+        continue;
+      }
 
       // create node <c>
       file << "<c";
@@ -146,7 +142,7 @@ void xml_sheet_data_slim(
       if (has_vm && !to_string(cc_c_vm[i]).empty())
         file << " vm" << "=\"" << to_string(cc_c_vm[i]).c_str() << "\"";
 
-      file << ">"; // end <c ...>
+      file << ">";  // end <c ...>
 
       bool f_si = false;
 
@@ -195,12 +191,10 @@ void xml_sheet_data_slim(
     file << "<sheetData/>";
   }
 
-
   file << post;
   file << "</worksheet>";
 
   file.close();
-
 }
 
 // export worksheet without pugixml
@@ -231,7 +225,7 @@ void xml_sheet_data(pugi::xml_node &doc, Rcpp::DataFrame &row_attr, Rcpp::DataFr
   auto lastrow = 0;  // integer value of the last row with column data
   auto thisrow = 0;  // integer value of the current row with column data
   auto row_idx = 0;  // the index of the row_attr file. this is != rowid
-  auto rowid   = 0;  // integer value of the r field in row_attr
+  auto rowid = 0;    // integer value of the r field in row_attr
 
   pugi::xml_node row;
 

From f3dc635323ba82a0e99b0b94c3ef5f92d3a18ac5 Mon Sep 17 00:00:00 2001
From: Jan Marvin Garbuszus <jan.garbuszus@rub.de>
Date: Mon, 3 Mar 2025 13:48:34 +0100
Subject: [PATCH 10/11] [misc] update NEWS

---
 NEWS.md | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/NEWS.md b/NEWS.md
index 189875a6b..71a53fa0b 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,5 +1,9 @@
 # openxlsx2 (development version)
 
+## New features
+
+* A new experimental `flush` argument has been introduced to `wb_save()`, allowing a custom XML streaming function for worksheets to help prevent memory spikes. This feature has only been tested within `openxlsx2` and not extensively with spreadsheet software. Since it bypasses certain failsafe mechanisms, including XML validity checks, it should only be used as a last-resort solution. [1255](https://github.com/JanMarvin/openxlsx2/pull/1255)
+
 ## Fixes
 
 * Input validation has been added to `fmt_txt()`, similar to how it has been added to the `create_*()` family a while ago.

From 0b8633e7353f57ee2999832039becda63d5d42bf Mon Sep 17 00:00:00 2001
From: Jan Marvin Garbuszus <jan.garbuszus@rub.de>
Date: Mon, 3 Mar 2025 13:55:51 +0100
Subject: [PATCH 11/11] [misc] final cleanup

---
 src/RcppExports.cpp         |  4 ++--
 src/write_file.cpp          |  8 ++++----
 tests/testthat/test-write.R | 16 +++++-----------
 3 files changed, 11 insertions(+), 17 deletions(-)

diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp
index eee07da48..1ff451036 100644
--- a/src/RcppExports.cpp
+++ b/src/RcppExports.cpp
@@ -867,14 +867,14 @@ BEGIN_RCPP
 END_RCPP
 }
 // write_worksheet
-XPtrXML write_worksheet(std::string prior, std::string post, Rcpp::Environment& sheet_data);
+XPtrXML write_worksheet(std::string prior, std::string post, Rcpp::Environment sheet_data);
 RcppExport SEXP _openxlsx2_write_worksheet(SEXP priorSEXP, SEXP postSEXP, SEXP sheet_dataSEXP) {
 BEGIN_RCPP
     Rcpp::RObject rcpp_result_gen;
     Rcpp::RNGScope rcpp_rngScope_gen;
     Rcpp::traits::input_parameter< std::string >::type prior(priorSEXP);
     Rcpp::traits::input_parameter< std::string >::type post(postSEXP);
-    Rcpp::traits::input_parameter< Rcpp::Environment& >::type sheet_data(sheet_dataSEXP);
+    Rcpp::traits::input_parameter< Rcpp::Environment >::type sheet_data(sheet_dataSEXP);
     rcpp_result_gen = Rcpp::wrap(write_worksheet(prior, post, sheet_data));
     return rcpp_result_gen;
 END_RCPP
diff --git a/src/write_file.cpp b/src/write_file.cpp
index af89faf58..11c43ae51 100644
--- a/src/write_file.cpp
+++ b/src/write_file.cpp
@@ -35,11 +35,12 @@ void xml_sheet_data_slim(
   bool has_vm = cc.containsElementNamed("c_vm");
 
   std::ofstream file(fl);
+  if (!file.is_open()) Rcpp::stop("could not open output file");
 
   auto lastrow = 0;  // integer value of the last row with column data
   auto thisrow = 0;  // integer value of the current row with column data
   auto row_idx = 0;  // the index of the row_attr file. this is != rowid
-  auto rowid = 0;    // integer value of the r field in row_attr
+  auto rowid   = 0;  // integer value of the r field in row_attr
 
   std::string xml_preserver = " ";
 
@@ -65,7 +66,6 @@ void xml_sheet_data_slim(
 
     Rcpp::CharacterVector row_r     = row_attr["r"];
 
-
     file << "<sheetData>";
     for (auto i = 0; i < cc.nrow(); ++i) {
       thisrow = std::stoi(Rcpp::as<std::string>(cc_row_r[i]));
@@ -225,7 +225,7 @@ void xml_sheet_data(pugi::xml_node &doc, Rcpp::DataFrame &row_attr, Rcpp::DataFr
   auto lastrow = 0;  // integer value of the last row with column data
   auto thisrow = 0;  // integer value of the current row with column data
   auto row_idx = 0;  // the index of the row_attr file. this is != rowid
-  auto rowid = 0;    // integer value of the r field in row_attr
+  auto rowid   = 0;  // integer value of the r field in row_attr
 
   pugi::xml_node row;
 
@@ -397,7 +397,7 @@ void xml_sheet_data(pugi::xml_node &doc, Rcpp::DataFrame &row_attr, Rcpp::DataFr
 // create single xml rows of sheet_data.
 //
 // [[Rcpp::export]]
-XPtrXML write_worksheet(std::string prior, std::string post, Rcpp::Environment &sheet_data) {
+XPtrXML write_worksheet(std::string prior, std::string post, Rcpp::Environment sheet_data) {
   uint32_t pugi_parse_flags = pugi::parse_cdata | pugi::parse_wconv_attribute | pugi::parse_ws_pcdata | pugi::parse_eol;
 
   // sheet_data will be in order, just need to check for row_heights
diff --git a/tests/testthat/test-write.R b/tests/testthat/test-write.R
index 8c94cc34e..aa60db992 100644
--- a/tests/testthat/test-write.R
+++ b/tests/testthat/test-write.R
@@ -1475,22 +1475,16 @@ test_that("guarding against overwriting shared formula reference works", {
 test_that("writing without pugixml works", {
 
   temp <- temp_xlsx()
-  expect_silent(write_xlsx(x = mtcars, file = temp))
+  expect_silent(write_xlsx(x = mtcars, file = temp, flush = TRUE))
   expect_silent(wb <- wb_load(temp))
 
-  temp <- temp_xlsx()
-  options("openxlsx2.export_with_pugi" = FALSE)
-  expect_silent(write_xlsx(x = mtcars, file = temp))
-  expect_silent(wb <- wb_load(temp))
+  wb <- wb_workbook()$add_worksheet()$add_data(x = mtcars)
 
-  temp <- temp_xlsx()
-  options("openxlsx2.export_with_pugi" = TRUE)
-  expect_silent(write_xlsx(x = mtcars, file = temp))
+  expect_silent(wb_save(wb = wb, file = temp, flush = TRUE))
   expect_silent(wb <- wb_load(temp))
+  wb <- wb_workbook()$add_worksheet()$add_data(x = mtcars)
 
-  temp <- temp_xlsx()
-  options("openxlsx2.export_with_pugi" = NULL)
-  expect_silent(write_xlsx(x = mtcars, file = temp))
+  expect_silent(wb$save(file = temp, flush = TRUE))
   expect_silent(wb <- wb_load(temp))
 
 })