@@ -53,6 +53,13 @@ class ReadStreamCSV : public ReadStream {
53
53
int size = static_cast <int >(inputMap.size ());
54
54
inputMap[size] = size;
55
55
}
56
+
57
+ rfc4180 = (getOr (rwOperation, " rfc4180" , " false" ) == " true" );
58
+ if (rfc4180 && delimiter.find (' "' ) != std::string::npos) {
59
+ std::stringstream errorMessage;
60
+ errorMessage << " CSV delimiter cannot contain '\" ' character when rfc4180 is enabled." ;
61
+ throw std::invalid_argument (errorMessage.str ());
62
+ }
56
63
}
57
64
58
65
protected:
@@ -79,11 +86,10 @@ class ReadStreamCSV : public ReadStream {
79
86
++lineNumber;
80
87
81
88
std::size_t start = 0 ;
82
- std::size_t end = 0 ;
83
89
std::size_t columnsFilled = 0 ;
84
90
for (uint32_t column = 0 ; columnsFilled < arity; column++) {
85
91
std::size_t charactersRead = 0 ;
86
- std::string element = nextElement (line, start, end );
92
+ std::string element = nextElement (line, start);
87
93
if (inputMap.count (column) == 0 ) {
88
94
continue ;
89
95
}
@@ -156,9 +162,60 @@ class ReadStreamCSV : public ReadStream {
156
162
return value;
157
163
}
158
164
159
- std::string nextElement (const std::string& line, std::size_t & start, std:: size_t & end ) {
165
+ std::string nextElement (const std::string& line, std::size_t & start) {
160
166
std::string element;
161
167
168
+ if (rfc4180) {
169
+ if (line[start] == ' "' ) {
170
+ // quoted field
171
+ const std::size_t end = line.length ();
172
+ std::size_t pos = start + 1 ;
173
+ bool foundEndQuote = false ;
174
+ while (pos < end) {
175
+ char c = line[pos++];
176
+ if (c == ' "' && (pos < end) && line[pos] == ' "' ) {
177
+ // two double-quote => one double-quote
178
+ element.push_back (' "' );
179
+ ++pos;
180
+ } else if (c == ' "' ) {
181
+ foundEndQuote = true ;
182
+ break ;
183
+ } else {
184
+ element.push_back (c);
185
+ }
186
+ }
187
+
188
+ if (!foundEndQuote) {
189
+ // missing closing quote
190
+ std::stringstream errorMessage;
191
+ errorMessage << " Unbalanced field quote in line " << lineNumber << " ; " ;
192
+ throw std::invalid_argument (errorMessage.str ());
193
+ }
194
+
195
+ // field must be immediately followed by delimiter or end of line
196
+ if (pos != line.length ()) {
197
+ std::size_t nextDelimiter = line.find (delimiter, pos);
198
+ if (nextDelimiter != pos) {
199
+ std::stringstream errorMessage;
200
+ errorMessage << " Separator expected immediately after quoted field in line "
201
+ << lineNumber << " ; " ;
202
+ throw std::invalid_argument (errorMessage.str ());
203
+ }
204
+ }
205
+
206
+ start = pos + delimiter.size ();
207
+ return element;
208
+ } else {
209
+ // non-quoted field, span until next delimiter or end of line
210
+ const std::size_t end = std::min (line.find (delimiter, start), line.length ());
211
+ element = line.substr (start, end - start);
212
+ start = end + delimiter.size ();
213
+
214
+ return element;
215
+ }
216
+ }
217
+
218
+ std::size_t end = start;
162
219
// Handle record/tuple delimiter coincidence.
163
220
if (delimiter.find (' ,' ) != std::string::npos) {
164
221
int record_parens = 0 ;
@@ -190,7 +247,7 @@ class ReadStreamCSV : public ReadStream {
190
247
// Handle the end-of-the-line case where parenthesis are unbalanced.
191
248
if (record_parens != 0 ) {
192
249
std::stringstream errorMessage;
193
- errorMessage << " Unbalanced record parenthesis " << lineNumber << " ; " ;
250
+ errorMessage << " Unbalanced record parenthesis in line " << lineNumber << " ; " ;
194
251
throw std::invalid_argument (errorMessage.str ());
195
252
}
196
253
} else {
@@ -238,6 +295,7 @@ class ReadStreamCSV : public ReadStream {
238
295
std::istream& file;
239
296
std::size_t lineNumber;
240
297
std::map<int , int > inputMap;
298
+ bool rfc4180;
241
299
};
242
300
243
301
class ReadFileCSV : public ReadStreamCSV {
0 commit comments