Skip to content

Commit 6e430fd

Browse files
committed
[writing] provide openxlsx2.export_with_pugi = FALSE option
1 parent ec3bd2e commit 6e430fd

File tree

5 files changed

+276
-27
lines changed

5 files changed

+276
-27
lines changed

R/RcppExports.R

+4
Original file line numberDiff line numberDiff line change
@@ -340,6 +340,10 @@ set_sst <- function(sharedStrings) {
340340
.Call(`_openxlsx2_set_sst`, sharedStrings)
341341
}
342342

343+
write_worksheet_slim <- function(sheet_data, prior, post, fl) {
344+
invisible(.Call(`_openxlsx2_write_worksheet_slim`, sheet_data, prior, post, fl))
345+
}
346+
343347
write_worksheet <- function(prior, post, sheet_data) {
344348
.Call(`_openxlsx2_write_worksheet`, prior, post, sheet_data)
345349
}

R/class-workbook.R

+50-27
Original file line numberDiff line numberDiff line change
@@ -9788,41 +9788,64 @@ wbWorkbook <- R6::R6Class(
97889788
prior <- self$worksheets[[i]]$get_prior_sheet_data()
97899789
post <- self$worksheets[[i]]$get_post_sheet_data()
97909790

9791-
if (!is.null(self$worksheets[[i]]$sheet_data$cc)) {
9791+
use_pugixml_export <- getOption("openxlsx2.export_with_pugi", default = TRUE)
97929792

9793-
self$worksheets[[i]]$sheet_data$cc$r <- with(
9794-
self$worksheets[[i]]$sheet_data$cc,
9795-
stringi::stri_join(c_r, row_r)
9796-
)
9797-
cc <- self$worksheets[[i]]$sheet_data$cc
9798-
# prepare data for output
9793+
if (use_pugixml_export) {
9794+
# failsaves. check that all rows and cells
9795+
# are available and in the correct order
9796+
if (!is.null(self$worksheets[[i]]$sheet_data$cc)) {
9797+
9798+
self$worksheets[[i]]$sheet_data$cc$r <- with(
9799+
self$worksheets[[i]]$sheet_data$cc,
9800+
stringi::stri_join(c_r, row_r)
9801+
)
9802+
cc <- self$worksheets[[i]]$sheet_data$cc
9803+
# prepare data for output
97999804

9800-
# there can be files, where row_attr is incomplete because a row
9801-
# is lacking any attributes (presumably was added before saving)
9802-
# still row_attr is what we want!
9805+
# there can be files, where row_attr is incomplete because a row
9806+
# is lacking any attributes (presumably was added before saving)
9807+
# still row_attr is what we want!
98039808

9804-
rows_attr <- self$worksheets[[i]]$sheet_data$row_attr
9805-
self$worksheets[[i]]$sheet_data$row_attr <- rows_attr[order(as.numeric(rows_attr[, "r"])), ]
9809+
rows_attr <- self$worksheets[[i]]$sheet_data$row_attr
9810+
self$worksheets[[i]]$sheet_data$row_attr <- rows_attr[order(as.numeric(rows_attr[, "r"])), ]
98069811

9807-
cc_rows <- self$worksheets[[i]]$sheet_data$row_attr$r
9808-
# c("row_r", "c_r", "r", "v", "c_t", "c_s", "c_cm", "c_ph", "c_vm", "f", "f_attr", "is")
9809-
cc <- cc[cc$row_r %in% cc_rows, ]
9812+
cc_rows <- self$worksheets[[i]]$sheet_data$row_attr$r
9813+
# c("row_r", "c_r", "r", "v", "c_t", "c_s", "c_cm", "c_ph", "c_vm", "f", "f_attr", "is")
9814+
cc <- cc[cc$row_r %in% cc_rows, ]
98109815

9811-
self$worksheets[[i]]$sheet_data$cc <- cc[order(as.integer(cc[, "row_r"]), col2int(cc[, "c_r"])), ]
9812-
} else {
9813-
self$worksheets[[i]]$sheet_data$row_attr <- NULL
9814-
self$worksheets[[i]]$sheet_data$cc <- NULL
9816+
self$worksheets[[i]]$sheet_data$cc <- cc[order(as.integer(cc[, "row_r"]), col2int(cc[, "c_r"])), ]
9817+
rm(cc)
9818+
} else {
9819+
self$worksheets[[i]]$sheet_data$row_attr <- NULL
9820+
self$worksheets[[i]]$sheet_data$cc <- NULL
9821+
}
98159822
}
98169823

9817-
# create entire sheet prior to writing it
9818-
sheet_xml <- write_worksheet(
9819-
prior = prior,
9820-
post = post,
9821-
sheet_data = self$worksheets[[i]]$sheet_data
9822-
)
98239824
ws_file <- file.path(xlworksheetsDir, sprintf("sheet%s.xml", i))
9824-
write_xmlPtr(doc = sheet_xml, fl = ws_file)
9825-
rm(sheet_xml)
9825+
9826+
if (use_pugixml_export) {
9827+
9828+
# create entire sheet prior to writing it
9829+
sheet_xml <- write_worksheet(
9830+
prior = prior,
9831+
post = post,
9832+
sheet_data = self$worksheets[[i]]$sheet_data
9833+
)
9834+
write_xmlPtr(doc = sheet_xml, fl = ws_file)
9835+
9836+
} else {
9837+
9838+
if (grepl("</worksheet>", prior))
9839+
prior <- substr(prior, 1, nchar(prior) - 13) # remove " </worksheet>"
9840+
9841+
write_worksheet_slim(
9842+
sheet_data = self$worksheets[[i]]$sheet_data,
9843+
prior = prior,
9844+
post = post,
9845+
fl = ws_file
9846+
)
9847+
9848+
}
98269849

98279850
## write worksheet rels
98289851
if (length(self$worksheets_rels[[i]])) {

src/RcppExports.cpp

+14
Original file line numberDiff line numberDiff line change
@@ -853,6 +853,19 @@ BEGIN_RCPP
853853
return rcpp_result_gen;
854854
END_RCPP
855855
}
856+
// write_worksheet_slim
857+
void write_worksheet_slim(Rcpp::Environment sheet_data, std::string prior, std::string post, std::string fl);
858+
RcppExport SEXP _openxlsx2_write_worksheet_slim(SEXP sheet_dataSEXP, SEXP priorSEXP, SEXP postSEXP, SEXP flSEXP) {
859+
BEGIN_RCPP
860+
Rcpp::RNGScope rcpp_rngScope_gen;
861+
Rcpp::traits::input_parameter< Rcpp::Environment >::type sheet_data(sheet_dataSEXP);
862+
Rcpp::traits::input_parameter< std::string >::type prior(priorSEXP);
863+
Rcpp::traits::input_parameter< std::string >::type post(postSEXP);
864+
Rcpp::traits::input_parameter< std::string >::type fl(flSEXP);
865+
write_worksheet_slim(sheet_data, prior, post, fl);
866+
return R_NilValue;
867+
END_RCPP
868+
}
856869
// write_worksheet
857870
XPtrXML write_worksheet(std::string prior, std::string post, Rcpp::Environment& sheet_data);
858871
RcppExport SEXP _openxlsx2_write_worksheet(SEXP priorSEXP, SEXP postSEXP, SEXP sheet_dataSEXP) {
@@ -1039,6 +1052,7 @@ static const R_CallMethodDef CallEntries[] = {
10391052
{"_openxlsx2_read_colors", (DL_FUNC) &_openxlsx2_read_colors, 1},
10401053
{"_openxlsx2_write_colors", (DL_FUNC) &_openxlsx2_write_colors, 1},
10411054
{"_openxlsx2_set_sst", (DL_FUNC) &_openxlsx2_set_sst, 1},
1055+
{"_openxlsx2_write_worksheet_slim", (DL_FUNC) &_openxlsx2_write_worksheet_slim, 4},
10421056
{"_openxlsx2_write_worksheet", (DL_FUNC) &_openxlsx2_write_worksheet, 3},
10431057
{"_openxlsx2_write_xmlPtr", (DL_FUNC) &_openxlsx2_write_xmlPtr, 2},
10441058
{"_openxlsx2_styles_bin", (DL_FUNC) &_openxlsx2_styles_bin, 3},

src/write_file.cpp

+186
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,192 @@ Rcpp::CharacterVector set_sst(Rcpp::CharacterVector sharedStrings) {
2020
return sst;
2121
}
2222

23+
// write xml by streaming to files. this takes whatever input we provide and
24+
// dumps it into the file. no xml checking, no unicode checking
25+
void xml_sheet_data_slim(
26+
Rcpp::DataFrame row_attr,
27+
Rcpp::DataFrame cc,
28+
std::string prior,
29+
std::string post,
30+
std::string fl
31+
) {
32+
33+
bool has_cm = cc.containsElementNamed("c_cm");
34+
bool has_ph = cc.containsElementNamed("c_ph");
35+
bool has_vm = cc.containsElementNamed("c_vm");
36+
37+
std::ofstream file(fl);
38+
39+
auto lastrow = 0; // integer value of the last row with column data
40+
auto thisrow = 0; // integer value of the current row with column data
41+
auto row_idx = 0; // the index of the row_attr file. this is != rowid
42+
auto rowid = 0; // integer value of the r field in row_attr
43+
44+
std::string xml_preserver = " ";
45+
46+
file << "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n";
47+
file << prior;
48+
49+
Rcpp::CharacterVector cc_c_cm, cc_c_ph, cc_c_vm;
50+
51+
if (cc.nrow() && cc.ncol()) {
52+
// we cannot access rows directly in the dataframe.
53+
// Have to extract the columns and use these
54+
Rcpp::CharacterVector cc_row_r = cc["row_r"]; // 1
55+
Rcpp::CharacterVector cc_r = cc["r"]; // A1
56+
Rcpp::CharacterVector cc_v = cc["v"];
57+
Rcpp::CharacterVector cc_c_t = cc["c_t"];
58+
Rcpp::CharacterVector cc_c_s = cc["c_s"];
59+
if (has_cm) cc_c_cm = cc["c_cm"];
60+
if (has_ph) cc_c_ph = cc["c_ph"];
61+
if (has_vm) cc_c_vm = cc["c_vm"];
62+
Rcpp::CharacterVector cc_f = cc["f"];
63+
Rcpp::CharacterVector cc_f_attr = cc["f_attr"];
64+
Rcpp::CharacterVector cc_is = cc["is"];
65+
66+
Rcpp::CharacterVector row_r = row_attr["r"];
67+
68+
69+
file << "<sheetData>";
70+
for (auto i = 0; i < cc.nrow(); ++i) {
71+
72+
thisrow = std::stoi(Rcpp::as<std::string>(cc_row_r[i]));
73+
74+
if (lastrow < thisrow) {
75+
76+
// there might be entirely empty rows in between. this is the case for
77+
// loadExample. We check the rowid and write the line and skip until we
78+
// have every row and only then continue writing the column
79+
while (rowid < thisrow) {
80+
81+
rowid = std::stoi(Rcpp::as<std::string>(
82+
row_r[row_idx]
83+
));
84+
85+
if (row_idx) file << "</row>";
86+
file << "<row";
87+
Rcpp::CharacterVector attrnams = row_attr.names();
88+
89+
for (auto j = 0; j < row_attr.ncol(); ++j) {
90+
91+
Rcpp::CharacterVector cv_s = "";
92+
cv_s = Rcpp::as<Rcpp::CharacterVector>(row_attr[j])[row_idx];
93+
94+
if (cv_s[0] != "") {
95+
const std::string val_strl = Rcpp::as<std::string>(cv_s);
96+
file << " " << attrnams[j] << "=\"" << val_strl.c_str() << "\"";
97+
}
98+
}
99+
file << ">"; // end <r ...>
100+
101+
// read the next row_idx when visiting again
102+
++row_idx;
103+
}
104+
}
105+
106+
// create node <c>
107+
file << "<c";
108+
109+
// Every cell consists of a typ and a val list. Certain functions have an
110+
// additional attr list.
111+
112+
// append attributes <c r="A1" ...>
113+
file << " r" << "=\"" << to_string(cc_r[i]).c_str() << "\"";
114+
115+
if (!to_string(cc_c_s[i]).empty())
116+
file << " s" << "=\"" << to_string(cc_c_s[i]).c_str() << "\"";
117+
118+
// assign type if not <v> aka numeric
119+
if (!to_string(cc_c_t[i]).empty())
120+
file << " t" << "=\"" << to_string(cc_c_t[i]).c_str() << "\"";
121+
122+
// CellMetaIndex: suppress curly brackets in spreadsheet software
123+
if (has_cm && !to_string(cc_c_cm[i]).empty())
124+
file << " cm" << "=\"" << to_string(cc_c_cm[i]).c_str() << "\"";
125+
126+
// phonetics spelling
127+
if (has_ph && !to_string(cc_c_ph[i]).empty())
128+
file << " ph" << "=\"" << to_string(cc_c_ph[i]).c_str() << "\"";
129+
130+
// suppress curly brackets in spreadsheet software
131+
if (has_vm && !to_string(cc_c_vm[i]).empty())
132+
file << " vm" << "=\"" << to_string(cc_c_vm[i]).c_str() << "\"";
133+
134+
file << ">"; // end <c ...>
135+
136+
bool f_si = false;
137+
138+
// <f> ... </f>
139+
// f node: formula to be evaluated
140+
if (!to_string(cc_f[i]).empty() || !to_string(cc_f_attr[i]).empty()) {
141+
file << "<f";
142+
if (!to_string(cc_f_attr[i]).empty()) {
143+
file << to_string(cc_f_attr[i]).c_str();
144+
}
145+
file << ">";
146+
147+
file << to_string(cc_f[i]).c_str();
148+
149+
file << "</f>";
150+
}
151+
152+
// v node: value stored from evaluated formula
153+
if (!to_string(cc_v[i]).empty()) {
154+
if (!f_si & (to_string(cc_v[i]).compare(xml_preserver.c_str()) == 0)) {
155+
// this looks strange
156+
file << "<v xml:space=\"preserve\">";
157+
file << " ";
158+
file << "</v>";
159+
} else {
160+
file << "<v>" << to_string(cc_v[i]).c_str() << "</v>";
161+
}
162+
}
163+
164+
// <is><t> ... </t></is>
165+
if (to_string(cc_c_t[i]).compare("inlineStr") == 0) {
166+
if (!to_string(cc_is[i]).empty()) {
167+
file << to_string(cc_is[i]).c_str();
168+
}
169+
}
170+
171+
file << "</c>";
172+
173+
// update lastrow
174+
lastrow = thisrow;
175+
}
176+
177+
file << "</row>";
178+
file << "</sheetData>";
179+
} else {
180+
file << "<sheetData/>";
181+
}
182+
183+
184+
file << post;
185+
file << "</worksheet>";
186+
187+
file.close();
188+
189+
}
190+
191+
// export worksheet without pugixml
192+
// this should be way quicker, uses far less memory, but also skips all of the checks pugi does
193+
//
194+
// [[Rcpp::export]]
195+
void write_worksheet_slim(
196+
Rcpp::Environment sheet_data,
197+
std::string prior,
198+
std::string post,
199+
std::string fl
200+
){
201+
// sheet_data will be in order, just need to check for row_heights
202+
// CharacterVector cell_col = int_to_col(sheet_data.field("cols"));
203+
Rcpp::DataFrame row_attr = Rcpp::as<Rcpp::DataFrame>(sheet_data["row_attr"]);
204+
Rcpp::DataFrame cc = Rcpp::as<Rcpp::DataFrame>(sheet_data["cc"]);
205+
206+
xml_sheet_data_slim(row_attr, cc, prior, post, fl);
207+
}
208+
23209
// creates an xml row
24210
// data in xml is ordered row wise. therefore we need the row attributes and
25211
// the column data used in this row. This function uses both to create a single

tests/testthat/test-write.R

+22
Original file line numberDiff line numberDiff line change
@@ -1470,5 +1470,27 @@ test_that("guarding against overwriting shared formula reference works", {
14701470
exp <- c("1", "2", "B1 + 1", "C1 + 1")
14711471
got <- unname(unlist(wb$to_df(show_formula = TRUE, col_names = FALSE)))
14721472
expect_equal(exp, got)
1473+
})
1474+
1475+
test_that("writing without pugixml works", {
1476+
1477+
temp <- temp_xlsx()
1478+
expect_silent(write_xlsx(x = mtcars, file = temp))
1479+
expect_silent(wb <- wb_load(temp))
1480+
1481+
temp <- temp_xlsx()
1482+
options("openxlsx2.export_with_pugi" = FALSE)
1483+
expect_silent(write_xlsx(x = mtcars, file = temp))
1484+
expect_silent(wb <- wb_load(temp))
1485+
1486+
temp <- temp_xlsx()
1487+
options("openxlsx2.export_with_pugi" = TRUE)
1488+
expect_silent(write_xlsx(x = mtcars, file = temp))
1489+
expect_silent(wb <- wb_load(temp))
1490+
1491+
temp <- temp_xlsx()
1492+
options("openxlsx2.export_with_pugi" = NULL)
1493+
expect_silent(write_xlsx(x = mtcars, file = temp))
1494+
expect_silent(wb <- wb_load(temp))
14731495

14741496
})

0 commit comments

Comments
 (0)