@@ -20,6 +20,192 @@ Rcpp::CharacterVector set_sst(Rcpp::CharacterVector sharedStrings) {
20
20
return sst;
21
21
}
22
22
23
+ // write xml by streaming to files. this takes whatever input we provide and
24
+ // dumps it into the file. no xml checking, no unicode checking
25
+ void xml_sheet_data_slim (
26
+ Rcpp::DataFrame row_attr,
27
+ Rcpp::DataFrame cc,
28
+ std::string prior,
29
+ std::string post,
30
+ std::string fl
31
+ ) {
32
+
33
+ bool has_cm = cc.containsElementNamed (" c_cm" );
34
+ bool has_ph = cc.containsElementNamed (" c_ph" );
35
+ bool has_vm = cc.containsElementNamed (" c_vm" );
36
+
37
+ std::ofstream file (fl);
38
+
39
+ auto lastrow = 0 ; // integer value of the last row with column data
40
+ auto thisrow = 0 ; // integer value of the current row with column data
41
+ auto row_idx = 0 ; // the index of the row_attr file. this is != rowid
42
+ auto rowid = 0 ; // integer value of the r field in row_attr
43
+
44
+ std::string xml_preserver = " " ;
45
+
46
+ file << " <?xml version=\" 1.0\" encoding=\" UTF-8\" ?>\n " ;
47
+ file << prior;
48
+
49
+ Rcpp::CharacterVector cc_c_cm, cc_c_ph, cc_c_vm;
50
+
51
+ if (cc.nrow () && cc.ncol ()) {
52
+ // we cannot access rows directly in the dataframe.
53
+ // Have to extract the columns and use these
54
+ Rcpp::CharacterVector cc_row_r = cc[" row_r" ]; // 1
55
+ Rcpp::CharacterVector cc_r = cc[" r" ]; // A1
56
+ Rcpp::CharacterVector cc_v = cc[" v" ];
57
+ Rcpp::CharacterVector cc_c_t = cc[" c_t" ];
58
+ Rcpp::CharacterVector cc_c_s = cc[" c_s" ];
59
+ if (has_cm) cc_c_cm = cc[" c_cm" ];
60
+ if (has_ph) cc_c_ph = cc[" c_ph" ];
61
+ if (has_vm) cc_c_vm = cc[" c_vm" ];
62
+ Rcpp::CharacterVector cc_f = cc[" f" ];
63
+ Rcpp::CharacterVector cc_f_attr = cc[" f_attr" ];
64
+ Rcpp::CharacterVector cc_is = cc[" is" ];
65
+
66
+ Rcpp::CharacterVector row_r = row_attr[" r" ];
67
+
68
+
69
+ file << " <sheetData>" ;
70
+ for (auto i = 0 ; i < cc.nrow (); ++i) {
71
+
72
+ thisrow = std::stoi (Rcpp::as<std::string>(cc_row_r[i]));
73
+
74
+ if (lastrow < thisrow) {
75
+
76
+ // there might be entirely empty rows in between. this is the case for
77
+ // loadExample. We check the rowid and write the line and skip until we
78
+ // have every row and only then continue writing the column
79
+ while (rowid < thisrow) {
80
+
81
+ rowid = std::stoi (Rcpp::as<std::string>(
82
+ row_r[row_idx]
83
+ ));
84
+
85
+ if (row_idx) file << " </row>" ;
86
+ file << " <row" ;
87
+ Rcpp::CharacterVector attrnams = row_attr.names ();
88
+
89
+ for (auto j = 0 ; j < row_attr.ncol (); ++j) {
90
+
91
+ Rcpp::CharacterVector cv_s = " " ;
92
+ cv_s = Rcpp::as<Rcpp::CharacterVector>(row_attr[j])[row_idx];
93
+
94
+ if (cv_s[0 ] != " " ) {
95
+ const std::string val_strl = Rcpp::as<std::string>(cv_s);
96
+ file << " " << attrnams[j] << " =\" " << val_strl.c_str () << " \" " ;
97
+ }
98
+ }
99
+ file << " >" ; // end <r ...>
100
+
101
+ // read the next row_idx when visiting again
102
+ ++row_idx;
103
+ }
104
+ }
105
+
106
+ // create node <c>
107
+ file << " <c" ;
108
+
109
+ // Every cell consists of a typ and a val list. Certain functions have an
110
+ // additional attr list.
111
+
112
+ // append attributes <c r="A1" ...>
113
+ file << " r" << " =\" " << to_string (cc_r[i]).c_str () << " \" " ;
114
+
115
+ if (!to_string (cc_c_s[i]).empty ())
116
+ file << " s" << " =\" " << to_string (cc_c_s[i]).c_str () << " \" " ;
117
+
118
+ // assign type if not <v> aka numeric
119
+ if (!to_string (cc_c_t [i]).empty ())
120
+ file << " t" << " =\" " << to_string (cc_c_t [i]).c_str () << " \" " ;
121
+
122
+ // CellMetaIndex: suppress curly brackets in spreadsheet software
123
+ if (has_cm && !to_string (cc_c_cm[i]).empty ())
124
+ file << " cm" << " =\" " << to_string (cc_c_cm[i]).c_str () << " \" " ;
125
+
126
+ // phonetics spelling
127
+ if (has_ph && !to_string (cc_c_ph[i]).empty ())
128
+ file << " ph" << " =\" " << to_string (cc_c_ph[i]).c_str () << " \" " ;
129
+
130
+ // suppress curly brackets in spreadsheet software
131
+ if (has_vm && !to_string (cc_c_vm[i]).empty ())
132
+ file << " vm" << " =\" " << to_string (cc_c_vm[i]).c_str () << " \" " ;
133
+
134
+ file << " >" ; // end <c ...>
135
+
136
+ bool f_si = false ;
137
+
138
+ // <f> ... </f>
139
+ // f node: formula to be evaluated
140
+ if (!to_string (cc_f[i]).empty () || !to_string (cc_f_attr[i]).empty ()) {
141
+ file << " <f" ;
142
+ if (!to_string (cc_f_attr[i]).empty ()) {
143
+ file << to_string (cc_f_attr[i]).c_str ();
144
+ }
145
+ file << " >" ;
146
+
147
+ file << to_string (cc_f[i]).c_str ();
148
+
149
+ file << " </f>" ;
150
+ }
151
+
152
+ // v node: value stored from evaluated formula
153
+ if (!to_string (cc_v[i]).empty ()) {
154
+ if (!f_si & (to_string (cc_v[i]).compare (xml_preserver.c_str ()) == 0 )) {
155
+ // this looks strange
156
+ file << " <v xml:space=\" preserve\" >" ;
157
+ file << " " ;
158
+ file << " </v>" ;
159
+ } else {
160
+ file << " <v>" << to_string (cc_v[i]).c_str () << " </v>" ;
161
+ }
162
+ }
163
+
164
+ // <is><t> ... </t></is>
165
+ if (to_string (cc_c_t [i]).compare (" inlineStr" ) == 0 ) {
166
+ if (!to_string (cc_is[i]).empty ()) {
167
+ file << to_string (cc_is[i]).c_str ();
168
+ }
169
+ }
170
+
171
+ file << " </c>" ;
172
+
173
+ // update lastrow
174
+ lastrow = thisrow;
175
+ }
176
+
177
+ file << " </row>" ;
178
+ file << " </sheetData>" ;
179
+ } else {
180
+ file << " <sheetData/>" ;
181
+ }
182
+
183
+
184
+ file << post;
185
+ file << " </worksheet>" ;
186
+
187
+ file.close ();
188
+
189
+ }
190
+
191
+ // export worksheet without pugixml
192
+ // this should be way quicker, uses far less memory, but also skips all of the checks pugi does
193
+ //
194
+ // [[Rcpp::export]]
195
+ void write_worksheet_slim (
196
+ Rcpp::Environment sheet_data,
197
+ std::string prior,
198
+ std::string post,
199
+ std::string fl
200
+ ){
201
+ // sheet_data will be in order, just need to check for row_heights
202
+ // CharacterVector cell_col = int_to_col(sheet_data.field("cols"));
203
+ Rcpp::DataFrame row_attr = Rcpp::as<Rcpp::DataFrame>(sheet_data[" row_attr" ]);
204
+ Rcpp::DataFrame cc = Rcpp::as<Rcpp::DataFrame>(sheet_data[" cc" ]);
205
+
206
+ xml_sheet_data_slim (row_attr, cc, prior, post, fl);
207
+ }
208
+
23
209
// creates an xml row
24
210
// data in xml is ordered row wise. therefore we need the row attributes and
25
211
// the column data used in this row. This function uses both to create a single
0 commit comments