Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Experimental] Second attempt toying around with data.table #908

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ Depends:
Imports:
R6,
Rcpp,
data.table,
grDevices,
magrittr,
stringi,
Expand Down
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,7 @@ importFrom(grDevices,tiff)
importFrom(magrittr,"%>%")
importFrom(stringi,stri_c)
importFrom(stringi,stri_encode)
importFrom(stringi,stri_extract_first)
importFrom(stringi,stri_isempty)
importFrom(stringi,stri_join)
importFrom(stringi,stri_match)
Expand Down
4 changes: 4 additions & 0 deletions R/RcppExports.R
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,10 @@ dims_to_df <- function(rows, cols, fill) {
.Call(`_openxlsx2_dims_to_df`, rows, cols, fill)
}

df_to_char <- function(df) {
.Call(`_openxlsx2_df_to_char`, df)
}

long_to_wide <- function(z, tt, zz) {
invisible(.Call(`_openxlsx2_long_to_wide`, z, tt, zz))
}
Expand Down
6 changes: 3 additions & 3 deletions R/class-comment.R
Original file line number Diff line number Diff line change
Expand Up @@ -63,9 +63,9 @@ wbComment <- R6::R6Class(
for (i in seq_along(s)) {
styleShow <- c(
styleShow,
sprintf("Font name: %s\n", unname(unlist(xml_attr(s[[i]], "font", "name")))), ## Font name
sprintf("Font size: %s\n", unname(unlist(xml_attr(s[[i]], "font", "sz")))), ## Font size
sprintf("Font color: %s\n", gsub("^FF", "#", unname(unlist(xml_attr(s[[i]], "font", "color"))))), ## Font color
sprintf("Font name: %s\n", df_to_char(xml_attr(s[[i]], "font", "name"))), ## Font name
sprintf("Font size: %s\n", df_to_char(xml_attr(s[[i]], "font", "sz"))), ## Font size
sprintf("Font color: %s\n", gsub("^FF", "#", df_to_char(xml_attr(s[[i]], "font", "color")))), ## Font color
"\n\n"
)
}
Expand Down
18 changes: 9 additions & 9 deletions R/class-workbook.R
Original file line number Diff line number Diff line change
Expand Up @@ -2943,7 +2943,7 @@ wbWorkbook <- R6::R6Class(
to_dims_df_i <- dims_to_dataframe(to_dims, fill = FALSE)
to_dims_df_f <- dims_to_dataframe(to_dims, fill = TRUE)

to_dims_f <- unname(unlist(to_dims_df_f))
to_dims_f <- df_to_char(to_dims_df_f)

from_sheet <- wb_validate_sheet(self, from_sheet)
from_dims <- as.character(unlist(from_dims_df))
Expand Down Expand Up @@ -6830,7 +6830,7 @@ wbWorkbook <- R6::R6Class(
cols <- (seq_len(ncol(did)) %% every_nth_col) == 0
rows <- (seq_len(nrow(did)) %% every_nth_row) == 0

dims <- unname(unlist(did[rows, cols, drop = FALSE]))
dims <- df_to_char(did[rows, cols, drop = FALSE])

cc <- self$worksheets[[sheet]]$sheet_data$cc
cc <- cc[cc$r %in% dims, ]
Expand Down Expand Up @@ -6900,7 +6900,7 @@ wbWorkbook <- R6::R6Class(
private$do_cell_init(sheet, dims)

did <- dims_to_dataframe(dims, fill = TRUE)
dims <- unname(unlist(did))
dims <- df_to_char(did)

cc <- self$worksheets[[sheet]]$sheet_data$cc
cc <- cc[cc$r %in% dims, ]
Expand Down Expand Up @@ -6953,7 +6953,7 @@ wbWorkbook <- R6::R6Class(
private$do_cell_init(sheet, dims)

did <- dims_to_dataframe(dims, fill = TRUE)
dims <- unname(unlist(did))
dims <- df_to_char(did)

cc <- self$worksheets[[sheet]]$sheet_data$cc
cc <- cc[cc$r %in% dims, ]
Expand Down Expand Up @@ -7058,7 +7058,7 @@ wbWorkbook <- R6::R6Class(
private$do_cell_init(sheet, dims)

did <- dims_to_dataframe(dims, fill = TRUE)
dims <- unname(unlist(did))
dims <- df_to_char(did)

cc <- self$worksheets[[sheet]]$sheet_data$cc
cc <- cc[cc$r %in% dims, ]
Expand Down Expand Up @@ -7115,7 +7115,7 @@ wbWorkbook <- R6::R6Class(
temp <- self$clone()$.__enclos_env__$private$do_cell_init(sheet, dims)

# if a range is passed (e.g. "A1:B2") we need to get every cell
dims <- unname(unlist(dims))
dims <- df_to_char(dims)

# TODO check that cc$r is alway valid. not sure atm
sel <- temp$worksheets[[sheet]]$sheet_data$cc$r %in% dims
Expand All @@ -7139,7 +7139,7 @@ wbWorkbook <- R6::R6Class(
private$do_cell_init(sheet, dims)

# if a range is passed (e.g. "A1:B2") we need to get every cell
dims <- unname(unlist(dims))
dims <- df_to_char(dims)

sel <- self$worksheets[[sheet]]$sheet_data$cc$r %in% dims

Expand Down Expand Up @@ -7167,7 +7167,7 @@ wbWorkbook <- R6::R6Class(
rows <- as.integer(dims_to_rowcol(rows)[[2]])

dims <- wb_dims(rows, "A")
cells <- unname(unlist(dims_to_dataframe(dims, fill = TRUE)))
cells <- df_to_char(dims_to_dataframe(dims, fill = TRUE))
cc <- self$worksheets[[sheet]]$sheet_data$cc

cells <- cells[!cells %in% cc$r]
Expand Down Expand Up @@ -8454,7 +8454,7 @@ wbWorkbook <- R6::R6Class(
if (length(need_cells) == 1 && grepl(":|;", need_cells))
need_cells <- dims_to_dataframe(dims, fill = TRUE)

exp_cells <- unname(unlist(need_cells))
exp_cells <- df_to_char(need_cells)
got_cells <- self$worksheets[[sheet]]$sheet_data$cc$r

# initialize cell
Expand Down
12 changes: 6 additions & 6 deletions R/class-worksheet.R
Original file line number Diff line number Diff line change
Expand Up @@ -523,8 +523,8 @@ wbWorksheet <- R6::R6Class(
# regmatch0 will return character(0) when x is NULL
if (length(current)) {

new_merge <- unname(unlist(dims_to_dataframe(sqref, fill = TRUE)))
current_cells <- lapply(current, function(x) unname(unlist(dims_to_dataframe(x, fill = TRUE))))
new_merge <- df_to_char(dims_to_dataframe(sqref, fill = TRUE))
current_cells <- lapply(current, function(x) df_to_char(dims_to_dataframe(x, fill = TRUE)))
intersects <- vapply(current_cells, function(x) any(x %in% new_merge), NA)

# Error if merge intersects
Expand Down Expand Up @@ -572,8 +572,8 @@ wbWorksheet <- R6::R6Class(
current <- rbindlist(xml_attr(xml = self$mergeCells, "mergeCell"))$ref

if (!is.null(current)) {
new_merge <- unname(unlist(dims_to_dataframe(sqref, fill = TRUE)))
current_cells <- lapply(current, function(x) unname(unlist(dims_to_dataframe(x, fill = TRUE))))
new_merge <- df_to_char(dims_to_dataframe(sqref, fill = TRUE))
current_cells <- lapply(current, function(x) df_to_char(dims_to_dataframe(x, fill = TRUE)))
intersects <- vapply(current_cells, function(x) any(x %in% new_merge), NA)

# Remove intersection
Expand Down Expand Up @@ -603,7 +603,7 @@ wbWorksheet <- R6::R6Class(
rows <- rownames(ddims)
cols <- colnames(ddims)

dims <- unname(unlist(ddims))
dims <- df_to_char(ddims)
sel <- cc$r %in% dims
}

Expand Down Expand Up @@ -817,7 +817,7 @@ wbWorksheet <- R6::R6Class(
unlockedFormula = FALSE
) {

dims <- unname(unlist(dims_to_dataframe(dims, fill = TRUE)))
dims <- df_to_char(dims_to_dataframe(dims, fill = TRUE))

iEs <- self$ignoredErrors
if (xml_node_name(iEs) == "ignoredErrors") {
Expand Down
16 changes: 16 additions & 0 deletions R/helper-functions.R
Original file line number Diff line number Diff line change
Expand Up @@ -1660,3 +1660,19 @@ clone_shared_strings <- function(wb_old, old, wb_new, new) {
# print(sprintf("cloned: %s", length(new_ids)))

}

names_to_df <- function(x) {
structure(
lapply(x, c),
names = x,
class = "data.frame",
row.names = c(NA, 1)
)
}

dt_rbind <- function(x, y) {
if (is.character(y)) {
y <- names_to_df(y)
}
as.data.frame(data.table::rbindlist(list(x, y)))
}
6 changes: 3 additions & 3 deletions R/openxlsx2-package.R
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,9 @@
#' @import R6
#' @importFrom grDevices bmp col2rgb colors dev.copy dev.list dev.off jpeg png rgb tiff
#' @importFrom magrittr %>%
#' @importFrom stringi stri_c stri_encode stri_isempty stri_join stri_match
#' stri_match_all_regex stri_order stri_opts_collator stri_pad_left
#' stri_rand_strings stri_read_lines stri_replace_all_fixed
#' @importFrom stringi stri_c stri_encode stri_extract_first stri_isempty
#' stri_join stri_match stri_match_all_regex stri_order stri_opts_collator
#' stri_pad_left stri_rand_strings stri_read_lines stri_replace_all_fixed
#' stri_split_fixed stri_split_regex stri_sub stri_unescape_unicode
#' stri_unique
#' @importFrom utils download.file head menu read.csv unzip
Expand Down
2 changes: 1 addition & 1 deletion R/read.R
Original file line number Diff line number Diff line change
Expand Up @@ -734,7 +734,7 @@ wb_data <- function(wb, sheet = current_sheet(), dims, ...) {
sheetname <- wb$get_sheet_names(escape = TRUE)[[sheetno]]

if (missing(dims)) {
dims <- unname(unlist(xml_attr(wb$worksheets[[sheetno]]$dimension, "dimension")))
dims <- df_to_char(xml_attr(wb$worksheets[[sheetno]]$dimension, "dimension"))
}

z <- wb_to_df(wb, sheet, dims = dims, ...)
Expand Down
2 changes: 1 addition & 1 deletion R/utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -787,7 +787,7 @@ get_relship_id <- function(obj, x) {
relship <- rbindlist(xml_attr(obj, "Relationship"))
relship$typ <- basename(relship$Type)
relship <- relship[relship$typ == x, ]
unname(unlist(relship[c("Id")]))
df_to_char(relship[c("Id")])
}

#' filename_id returns an integer vector with the file name as name
Expand Down
12 changes: 6 additions & 6 deletions R/write.R
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ inner_update <- function(
row_attr_missing <- empty_row_attr(n = length(missing_rows))
row_attr_missing$r <- missing_rows

row_attr <- rbind(row_attr, row_attr_missing)
row_attr <- dt_rbind(row_attr, row_attr_missing)

# order
row_attr <- row_attr[order(as.numeric(row_attr$r)), ]
Expand All @@ -62,11 +62,11 @@ inner_update <- function(
# create missing cells
cc_missing <- create_char_dataframe(names(cc), length(missing_cells))
cc_missing$r <- missing_cells
cc_missing$row_r <- gsub("[[:upper:]]", "", cc_missing$r)
cc_missing$c_r <- gsub("[[:digit:]]", "", cc_missing$r)
cc_missing$row_r <- stringi::stri_extract_first(missing_cells, regex = "[0-9]+")
cc_missing$c_r <- stringi::stri_extract_first(missing_cells, regex = "[A-Z]+")

# assign to cc
cc <- rbind(cc, cc_missing)
cc <- dt_rbind(cc, cc_missing)

# order cc (not really necessary, will be done when saving)
cc <- cc[order(as.integer(cc[, "row_r"]), col2int(cc[, "c_r"])), ]
Expand Down Expand Up @@ -159,7 +159,7 @@ update_cell <- function(x, wb, sheet, cell, colNames = FALSE,
dims <- dims_to_dataframe(cell, fill = TRUE)
rows <- rownames(dims)

cells_needed <- unname(unlist(dims))
cells_needed <- df_to_char(dims)

inner_update(wb, sheet_id, x, rows, cells_needed, colNames, removeCellStyle, na.strings)
}
Expand Down Expand Up @@ -278,7 +278,7 @@ write_data2 <- function(
# its quicker to convert data to character and append the colnames
# then to create a data frame from colnames, construct the required
# length and copy the converted to character data into it.
data <- rbind(data, colnames(data))
data <- dt_rbind(data, colnames(data))
out <- c(nrow(data), seq_len(nrow(data))[-nrow(data)])
data <- data[out, , drop = FALSE]
}
Expand Down
12 changes: 12 additions & 0 deletions src/RcppExports.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,17 @@ BEGIN_RCPP
return rcpp_result_gen;
END_RCPP
}
// df_to_char
SEXP df_to_char(Rcpp::DataFrame df);
RcppExport SEXP _openxlsx2_df_to_char(SEXP dfSEXP) {
BEGIN_RCPP
Rcpp::RObject rcpp_result_gen;
Rcpp::RNGScope rcpp_rngScope_gen;
Rcpp::traits::input_parameter< Rcpp::DataFrame >::type df(dfSEXP);
rcpp_result_gen = Rcpp::wrap(df_to_char(df));
return rcpp_result_gen;
END_RCPP
}
// long_to_wide
void long_to_wide(Rcpp::DataFrame z, Rcpp::DataFrame tt, Rcpp::DataFrame zz);
RcppExport SEXP _openxlsx2_long_to_wide(SEXP zSEXP, SEXP ttSEXP, SEXP zzSEXP) {
Expand Down Expand Up @@ -952,6 +963,7 @@ static const R_CallMethodDef CallEntries[] = {
{"_openxlsx2_rbindlist", (DL_FUNC) &_openxlsx2_rbindlist, 1},
{"_openxlsx2_copy", (DL_FUNC) &_openxlsx2_copy, 1},
{"_openxlsx2_dims_to_df", (DL_FUNC) &_openxlsx2_dims_to_df, 3},
{"_openxlsx2_df_to_char", (DL_FUNC) &_openxlsx2_df_to_char, 1},
{"_openxlsx2_long_to_wide", (DL_FUNC) &_openxlsx2_long_to_wide, 3},
{"_openxlsx2_is_charnum", (DL_FUNC) &_openxlsx2_is_charnum, 1},
{"_openxlsx2_wide_to_long", (DL_FUNC) &_openxlsx2_wide_to_long, 13},
Expand Down
43 changes: 30 additions & 13 deletions src/helper_functions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -272,6 +272,26 @@ SEXP dims_to_df(Rcpp::IntegerVector rows, Rcpp::CharacterVector cols, bool fill)
return df;
}

// [[Rcpp::export]]
SEXP df_to_char(Rcpp::DataFrame df) {

Rcpp::CharacterVector out(df.ncol() * df.nrow());

if (out.size() == 0) return R_NilValue;

auto pos = 0;
for (size_t i = 0; i < df.ncol(); ++i) {
Rcpp::CharacterVector cvec = Rcpp::as<Rcpp::CharacterVector>(df[i]);
for (size_t j = 0; j < df.nrow(); ++j) {
out[pos] = cvec[j];
++pos;
}
}

return Rcpp::wrap(out);
}


// similar to dcast converts cc dataframe to z dataframe
// [[Rcpp::export]]
void long_to_wide(Rcpp::DataFrame z, Rcpp::DataFrame tt, Rcpp::DataFrame zz) {
Expand Down Expand Up @@ -343,10 +363,10 @@ void wide_to_long(
std::string c_cm
) {

auto n = z.nrow();
auto m = z.ncol();
int32_t n = z.nrow();
int32_t m = z.ncol();

auto startcol = start_col;
int32_t startcol = start_col;

// pointer magic. even though these are extracted, they just point to the
// memory in the data frame
Expand All @@ -367,18 +387,19 @@ void wide_to_long(
else
na_strings = txt_to_si(na_strings, 0, 1, 1);

for (auto i = 0; i < m; ++i) {
for (int32_t i = 0; i < m; ++i) {
Rcpp::checkUserInterrupt();

Rcpp::CharacterVector cvec = Rcpp::as<Rcpp::CharacterVector>(z[i]);

std::string col = int_to_col(startcol);
int8_t vtyp_i = (int8_t)vtyps[i];

auto startrow = start_row;
for (auto j = 0; j < n; ++j) {
int32_t startrow = start_row;
for (int32_t j = 0; j < n; ++j) {
Rcpp::checkUserInterrupt();

int8_t vtyp = (int8_t)vtyps[i];
int8_t vtyp = vtyp_i;
// if colname is provided, the first row is always a character
if (ColNames & (j == 0)) vtyp = character;
std::string vals = Rcpp::as<std::string>(cvec[j]);
Expand Down Expand Up @@ -479,14 +500,10 @@ void wide_to_long(

}
}
}

if (cell.v.compare("NaN") == 0) {
} else if (cell.v.compare("NaN") == 0) {
cell.v = "#VALUE!";
cell.c_t = "e";
}

if (cell.v.compare("-Inf") == 0 || cell.v.compare("Inf") == 0) {
} else if (cell.v.compare("-Inf") == 0 || cell.v.compare("Inf") == 0) {
cell.v = "#NUM!";
cell.c_t = "e";
}
Expand Down
10 changes: 10 additions & 0 deletions tests/testthat/test-helper-functions.R
Original file line number Diff line number Diff line change
Expand Up @@ -250,3 +250,13 @@ test_that("is_double works", {
expect_equal(exp, got)

})

test_that("df_to_char works", {

dims <- dims_to_df(rows = 1:1000, cols = int2col(1:1000), fill = TRUE)

x <- unname(unlist(dims))
y <- df_to_char(dims)
expect_equal(x, y)

})
Loading