diff --git a/R/class-workbook.R b/R/class-workbook.R index 4377dcf4f..0fcee4e27 100644 --- a/R/class-workbook.R +++ b/R/class-workbook.R @@ -3309,19 +3309,22 @@ wbWorkbook <- R6::R6Class( }, FUN.VALUE = ""), collapse = "") if (WR != "") { WR <- rbindlist(xml_attr(WR, "Relationships", "Relationship")) - WR$tmpDirPartName <- paste0(tmpDir, "/xl/", folder, "/", WR$Target) - WR$fileExists <- file.exists(WR$tmpDirPartName) - - # exclude hyperlinks - WR$type <- basename(WR$Type) - WR <- WR[WR$type != "hyperlink", ] - - if (!all(WR$fileExists)) { - missing_in_tmp <- WR$Target[!WR$fileExists] - warning( - "[", folder, "] file expected to be in output is missing: ", - paste(missing_in_tmp, collapse = " ") - ) + + if (NROW(WR)) { # in xlsb files it can be that WR has no rows + WR$tmpDirPartName <- paste0(tmpDir, "/xl/", folder, "/", WR$Target) + WR$fileExists <- file.exists(WR$tmpDirPartName) + + # exclude hyperlinks + WR$type <- basename(WR$Type) + WR <- WR[WR$type != "hyperlink", ] + + if (!all(WR$fileExists)) { + missing_in_tmp <- WR$Target[!WR$fileExists] + warning( + "[", folder, "] file expected to be in output is missing: ", + paste(missing_in_tmp, collapse = " ") + ) + } } } } diff --git a/src/xlsb.cpp b/src/xlsb.cpp index 2c9ce1fc7..5090b8788 100644 --- a/src/xlsb.cpp +++ b/src/xlsb.cpp @@ -628,6 +628,7 @@ int table_bin(std::string filePath, std::string outPath, bool debug) { if (bin) { bin.seekg(0, std::ios_base::beg); bool end_of_table = false; + bool has_revision_record = false; while(!end_of_table) { Rcpp::checkUserInterrupt(); @@ -893,7 +894,7 @@ int table_bin(std::string filePath, std::string outPath, bool debug) { std::string fml; flags = readbin(flags, bin, swapit); int sharedFormula = false; - fml = CellParsedFormula(bin, swapit, debug, 0, 0, sharedFormula); + fml = CellParsedFormula(bin, swapit, debug, 0, 0, sharedFormula, has_revision_record); // need to write this formula somehwere if (debug )Rcpp::Rcout << fml << std::endl; @@ -921,6 +922,18 @@ int table_bin(std::string filePath, std::string outPath, bool debug) { break; } + case BrtRRChgCell: + case BrtRRDefName: + { + has_revision_record = true; + // -- have not seen this yet. if it appears, treat it as if a revision record was found -- + // rgce.rgce or rgceOld.rgce in BrtRRDefName + if (debug) Rcpp::Rcout << "BrtRRChgCell or BrtRRDefName" << std::endl; + Rcpp::warning("Assuming revision record."); + bin.seekg(size, bin.cur); + break; + } + default: { if (debug) { @@ -1395,6 +1408,7 @@ int workbook_bin(std::string filePath, std::string outPath, bool debug) { bin.seekg(0, std::ios_base::beg); bool end_of_workbook = false; bool first_extern_sheet = true; + bool has_revision_record = false; std::vector defNams, xtis, reference_type; defNams.push_back(""); @@ -1654,7 +1668,7 @@ int workbook_bin(std::string filePath, std::string outPath, bool debug) { std::string fml = "", comment = ""; int sharedFormula = false; - fml = CellParsedFormula(bin, swapit, debug, 0, 0, sharedFormula); + fml = CellParsedFormula(bin, swapit, debug, 0, 0, sharedFormula, has_revision_record); comment = XLNullableWideString(bin, swapit); @@ -1920,6 +1934,18 @@ int workbook_bin(std::string filePath, std::string outPath, bool debug) { break; } + case BrtRRChgCell: + case BrtRRDefName: + { + has_revision_record = true; + // -- have not seen this yet. if it appears, treat it as if a revision record was found -- + // rgce.rgce or rgceOld.rgce in BrtRRDefName + if (debug) Rcpp::Rcout << "BrtRRChgCell or BrtRRDefName" << std::endl; + Rcpp::warning("Assuming revision record."); + bin.seekg(size, bin.cur); + break; + } + default: { if (debug) { @@ -1961,6 +1987,7 @@ int worksheet_bin(std::string filePath, bool chartsheet, std::string outPath, bo bool first_row = true; bool in_sheet_data = false; bool end_of_worksheet = false; + bool has_revision_record = false; std::string fml_type; uint32_t row = 0; @@ -2810,7 +2837,7 @@ int worksheet_bin(std::string filePath, bool chartsheet, std::string outPath, bo grbitFlags = readbin(grbitFlags, bin, swapit); // GrbitFmlaFields *fields = (GrbitFmlaFields *)&grbitFlags; - std::string fml = CellParsedFormula(bin, swapit, debug, 0, row, is_shared_formula); + std::string fml = CellParsedFormula(bin, swapit, debug, 0, row, is_shared_formula, has_revision_record); @@ -2858,7 +2885,7 @@ int worksheet_bin(std::string filePath, bool chartsheet, std::string outPath, bo // int32_t len = size - 4 * 32 - 2 * 8; // std::string fml(len, '\0'); - std::string fml = CellParsedFormula(bin, swapit, debug, 0, row, is_shared_formula); + std::string fml = CellParsedFormula(bin, swapit, debug, 0, row, is_shared_formula, has_revision_record); xml_col column; column.v = fErr; @@ -2906,7 +2933,7 @@ int worksheet_bin(std::string filePath, bool chartsheet, std::string outPath, bo // fields->fAlwaysCalc, // fields->unused); - std::string fml = CellParsedFormula(bin, swapit, debug, 0, row, is_shared_formula); + std::string fml = CellParsedFormula(bin, swapit, debug, 0, row, is_shared_formula, has_revision_record); std::stringstream stream; stream << std::setprecision(16) << xnum; @@ -2957,7 +2984,7 @@ int worksheet_bin(std::string filePath, bool chartsheet, std::string outPath, bo grbitFlags = readbin(grbitFlags, bin, swapit); - std::string fml = CellParsedFormula(bin, swapit, debug, 0, row, is_shared_formula); + std::string fml = CellParsedFormula(bin, swapit, debug, 0, row, is_shared_formula, has_revision_record); // if (is_shared_formula) { // Rcpp::Rcout << fml << std::endl; @@ -3011,7 +3038,7 @@ int worksheet_bin(std::string filePath, bool chartsheet, std::string outPath, bo flags = readbin(flags, bin, 0); - std::string fml = CellParsedFormula(bin, swapit, debug, col, row, is_shared_formula); + std::string fml = CellParsedFormula(bin, swapit, debug, col, row, is_shared_formula, has_revision_record); if (debug) Rcpp::Rcout << "BrtArrFmla: " << fml << std::endl; // add to the last colvec element @@ -3056,7 +3083,7 @@ int worksheet_bin(std::string filePath, bool chartsheet, std::string outPath, bo if (debug) Rcpp::Rcout << "ref: " << ref << std::endl; - std::string fml = CellParsedFormula(bin, swapit, debug, col, row, is_shared_formula); + std::string fml = CellParsedFormula(bin, swapit, debug, col, row, is_shared_formula, has_revision_record); if (debug) Rcpp::Rcout << "BrtShrFmla: " << fml << std::endl; fml_type = "shared"; @@ -3414,7 +3441,7 @@ int worksheet_bin(std::string filePath, bool chartsheet, std::string outPath, bo std::string strProgID = XLNullableWideString(bin, swapit); int sharedFormula = false; - if (fLinked) std::string link = CellParsedFormula(bin, swapit, debug, 0, 0, sharedFormula); + if (fLinked) std::string link = CellParsedFormula(bin, swapit, debug, 0, 0, sharedFormula, has_revision_record); std::string stRelID = XLNullableWideString(bin, swapit); out << "" << std::endl; @@ -3507,13 +3534,13 @@ int worksheet_bin(std::string filePath, bool chartsheet, std::string outPath, bo int sharedFormula = false; std::string rgce1, rgce2, rgce3; if (cbFmla1 != 0x00000000) { - rgce1 = CellParsedFormula(bin, swapit, debug, 0, 0, sharedFormula); + rgce1 = CellParsedFormula(bin, swapit, debug, 0, 0, sharedFormula, has_revision_record); } if (cbFmla2 != 0x00000000) { - rgce2 = CellParsedFormula(bin, swapit, debug, 0, 0, sharedFormula); + rgce2 = CellParsedFormula(bin, swapit, debug, 0, 0, sharedFormula, has_revision_record); } if (cbFmla3 != 0x00000000) { - rgce3 = CellParsedFormula(bin, swapit, debug, 0, 0, sharedFormula); + rgce3 = CellParsedFormula(bin, swapit, debug, 0, 0, sharedFormula, has_revision_record); } BrtBeginCFRuleFields *fields = (BrtBeginCFRuleFields *)&flags; @@ -3644,6 +3671,18 @@ int worksheet_bin(std::string filePath, bool chartsheet, std::string outPath, bo break; } + case BrtRRChgCell: + case BrtRRDefName: + { + has_revision_record = true; + // -- have not seen this yet. if it appears, treat it as if a revision record was found -- + // rgce.rgce or rgceOld.rgce in BrtRRDefName + if (debug) Rcpp::Rcout << "BrtRRChgCell or BrtRRDefName" << std::endl; + Rcpp::warning("Assuming revision record."); + bin.seekg(size, bin.cur); + break; + } + default: { // if (debug) { diff --git a/src/xlsb_defines.h b/src/xlsb_defines.h index 1e60a4ada..6581e895e 100644 --- a/src/xlsb_defines.h +++ b/src/xlsb_defines.h @@ -179,6 +179,19 @@ typedef struct { uint16_t unused : 13; } BrtWbPropFields; + +typedef struct { + uint8_t columns : 2; + uint8_t rowType : 5; + bool squareBracketSpace : 1; + bool commaSpace : 1; + bool unused : 1; + uint8_t type : 2; + bool invalid : 1; + bool nonresident : 1; + uint8_t reserved2 : 2; +} PtgListFields; + enum RgbExtra { PtgExtraArray = 0, diff --git a/src/xlsb_funs.h b/src/xlsb_funs.h index c1a7f291c..08ad0354a 100644 --- a/src/xlsb_funs.h +++ b/src/xlsb_funs.h @@ -922,6 +922,29 @@ std::vector Xti(std::istream& sas, bool swapit) { // token == "#" || token == "@"; // } + +std::string array_elements(const std::vector& elements, int n, int k) { + std::stringstream ss; + ss << "{"; + for (int i = 0; i < n; ++i) { + if (i > 0) ss << ";"; + for (int j = 0; j < k; ++j) { + if (j > 0) ss << ","; + int index = i * k + j; + if (index < elements.size()) { + // check if it needs escaping + if (elements[index][0] == '"') ss << "\""; + ss << "\""; + ss << elements[index]; + if (elements[index][0] == '"') ss << "\""; + ss << "\""; + } + } + } + ss << "}"; + return ss.str(); +} + #include std::string parseRPN(const std::string& expression) { @@ -990,7 +1013,7 @@ std::string parseRPN(const std::string& expression) { } -std::string CellParsedFormula(std::istream& sas, bool swapit, bool debug, int col, int row, int &sharedFml) { +std::string CellParsedFormula(std::istream& sas, bool swapit, bool debug, int col, int row, int &sharedFml, bool has_revision_record) { // bool ptg_extra_array = false; uint32_t cce= 0, cb= 0; @@ -1039,7 +1062,7 @@ std::string CellParsedFormula(std::istream& sas, bool swapit, bool debug, int co case PtgList: { RgbExtra typ = PtgExtraList; - ptgextra.push_back(typ); + if (debug) Rcpp::Rcout << "PtgList " << sas.tellg() << std::endl; uint16_t ixti = 0, flags = 0; uint32_t listIndex = 0; @@ -1052,6 +1075,12 @@ std::string CellParsedFormula(std::istream& sas, bool swapit, bool debug, int co colFirst = ColShort(sas, swapit); colLast = ColShort(sas, swapit); + + PtgListFields *fields = (PtgListFields *)&flags; + + if (fields->nonresident) // different workbook and invalid == 0 + ptgextra.push_back(typ); + std::stringstream paddedStr; paddedStr << std::setw(12) << std::setfill('0') << ixti; @@ -1411,7 +1440,8 @@ std::string CellParsedFormula(std::istream& sas, bool swapit, bool debug, int co if (debug) Rcpp::Rcout << "PtgRef3d" < 0 && ptgextra.size() > cntr) { - if (ptgextra[cntr] == PtgExtraArray) { - if (debug) Rcpp::Rcout << "need PtgArray" << std::endl; - val1 = PtgArray; - } else if (ptgextra[cntr] == PtgExtraCol) { - if (debug) Rcpp::Rcout << "need PtgExp" << std::endl; - val1 = PtgExp; - } else if (ptgextra[cntr] == RevExtern) { - if (debug) Rcpp::Rcout << "need RevExtern" << std::endl; - val1 = RevExtern; - } else{ - Rcpp::Rcout << ptgextra[cntr] << std::endl; - } - } else if (ptgextra.size() < (cntr + 1)) { - if (debug) Rprintf("ptgextra %d and %d\n", (int)ptgextra.size(), (int)cntr); - } + // RgbExtra + for (size_t cntr = 0; cntr < ptgextra.size(); ++cntr) { + + val1 = ptgextra[cntr]; - ++cntr; + if (debug) + Rcpp::Rcout << cntr << ": " << (int32_t)val1 << std::endl; switch(val1) { - case PtgExp: + + case PtgExtraCol: { // PtgExtraCol // need_ptg_extra_col = true; @@ -1819,78 +1842,83 @@ std::string CellParsedFormula(std::istream& sas, bool swapit, bool debug, int co break; } - // TODO: this does not handle {"foo", "bar"} - case PtgArray: - case PtgArray2: - case PtgArray3: + case PtgExtraArray: { if (debug) Rcpp::Rcout << "PtgExtraArray" << std::endl; // PtgExtraArray - uint32_t rows = 0, cols = 0; - rows = readbin(rows, sas, swapit); - cols = readbin(cols, sas, swapit); - // blob (it is actually called this way) - uint8_t reserved = 0; - reserved = readbin(reserved, sas, swapit); + int32_t rows = 0, cols = 0; + // actually its DRw() and DCol(), but it does not matter? + rows = UncheckedRw(sas, swapit); + cols = UncheckedCol(sas, swapit); std::string array = ""; + std::vector array_elems; // (cols*rows); - if (debug) Rcpp::Rcout << (int32_t)reserved << std::endl; + if (debug) Rcpp::Rcout << rows << ": " << cols << std::endl; - // SerBool - if (reserved == 0x02) { - if (debug) Rcpp::Rcout << "SerBool" << std::endl; - uint8_t f = 0; - f = readbin(f, sas, swapit); - if (debug) Rcpp::Rcout << (int32_t)f << std::endl; + // number of elements in row order: must be equal to rows * cols + for (int32_t row = 0; row < rows; ++row) { + for (int32_t col = 0; col < cols; ++col) { - array = "{" + std::to_string((int32_t)f) + "}"; - // fml_out += "\n"; - } + // blob (it is actually called this way) + uint8_t reserved = 0; + reserved = readbin(reserved, sas, swapit); - // SerErr - if (reserved == 0x04) { - if (debug) Rcpp::Rcout << "SerErr" << std::endl; - uint8_t reserved2 = 0; - uint16_t reserved3 = 0; - std::string strerr = BErr(sas, swapit); - if (debug) Rcpp::Rcout << strerr << std::endl; - reserved2 = readbin(reserved2, sas, swapit); - reserved3 = readbin(reserved3, sas, swapit); - - array = "{" + strerr + "}"; - // fml_out += "\n"; - } + if (debug) Rcpp::Rcout << (int32_t)reserved << std::endl; - // SerNum - if (reserved == 0x00) { - if (debug) Rcpp::Rcout << "SerNum" << std::endl; - double xnum = 0.0; - xnum = Xnum(sas, swapit); + // SerBool + if (reserved == 0x02) { + if (debug) Rcpp::Rcout << "SerBool" << std::endl; + uint8_t f = 0; + f = readbin(f, sas, swapit); - std::stringstream stream; - stream << std::setprecision(16) << xnum; + if (debug) Rcpp::Rcout << (int32_t)f << std::endl; + array_elems.push_back(std::to_string((int32_t)f)); + } - if (debug) Rcpp::Rcout << xnum << std::endl; - array = "{" + stream.str() + "}"; - // fml_out += "\n"; - } + // SerErr + if (reserved == 0x04) { + if (debug) Rcpp::Rcout << "SerErr" << std::endl; + uint8_t reserved2 = 0; + uint16_t reserved3 = 0; + std::string strerr = BErr(sas, swapit); + reserved2 = readbin(reserved2, sas, swapit); + reserved3 = readbin(reserved3, sas, swapit); + + if (debug) Rcpp::Rcout << strerr << std::endl; + array_elems.push_back(strerr); + } - // SerStr - if (reserved == 0x01) { - if (debug) Rcpp::Rcout << "SerStr" << std::endl; - uint16_t cch = 0; - cch = readbin(cch, sas, swapit); - std::string rgch(cch, '\0'); - rgch = read_xlwidestring(rgch, sas); - if (debug) - Rcpp::Rcout << rgch << std::endl; + // SerNum + if (reserved == 0x00) { + if (debug) Rcpp::Rcout << "SerNum" << std::endl; + double xnum = 0.0; + xnum = Xnum(sas, swapit); - array = "{\"" + rgch + "\"}"; - // fml_out += "\n"; + std::stringstream stream; + stream << std::setprecision(16) << xnum; + + if (debug) Rcpp::Rcout << xnum << std::endl; + array_elems.push_back(stream.str()); + } + + // SerStr + if (reserved == 0x01) { + if (debug) Rcpp::Rcout << "SerStr" << std::endl; + uint16_t cch = 0; + cch = readbin(cch, sas, swapit); + std::string rgch(cch, '\0'); + rgch = read_xlwidestring(rgch, sas); + + if (debug) Rcpp::Rcout << rgch << std::endl; + array_elems.push_back(rgch); + } + } } + array += array_elements(array_elems, rows, cols); + size_t fi = fml_out.find("@array@"); if (fi != std::string::npos) { @@ -1907,6 +1935,45 @@ std::string CellParsedFormula(std::istream& sas, bool swapit, bool debug, int co break; } + case PtgExtraMem: + { + // not sure what this is good for + if (debug) Rcpp::Rcout << "PtgExtraMem: " << (int32_t)val1 << std::endl; + + int32_t count = 0; + count = readbin(count, sas, swapit); + + for (int32_t cnt = 0; cnt < count; ++cnt) { + std::vector ucrfx = UncheckedRfX(sas, swapit); + } + + break; + } + + case RevNameTabid: + { + // Rcpp::stop("Skip"); + if (debug) Rcpp::Rcout << "RevNameTabid: " << (int32_t)val1 << std::endl; + sas.seekg(pos, sas.beg); + break; + } + + case RevName: + { + // Rcpp::stop("Skip"); + if (debug) Rcpp::Rcout << "RevName: " << (int32_t)val1 << std::endl; + sas.seekg(pos, sas.beg); + break; + } + + case PtgExtraList: + { + // Rcpp::stop("Skip"); + if (debug) Rcpp::Rcout << "PtgExtraList: " << (int32_t)val1 << std::endl; + sas.seekg(pos, sas.beg); + break; + } + // do i need this? case RevExtern: { @@ -1935,7 +2002,7 @@ std::string CellParsedFormula(std::istream& sas, bool swapit, bool debug, int co default : { // Rcpp::stop("Skip"); - Rcpp::Rcout << "undefined cb: " << cb << std::endl; + Rcpp::Rcout << "undefined cb: " << (int32_t)val1 << std::endl; sas.seekg(pos, sas.beg); break; } @@ -1951,6 +2018,7 @@ std::string CellParsedFormula(std::istream& sas, bool swapit, bool debug, int co Rcpp::Rcout << "...fml..." << std::endl; Rcpp::Rcout << fml_out << std::endl; } + std::string inflix = parseRPN(fml_out); return inflix;