Skip to content

Commit 65bf651

Browse files
authored
Merge pull request tafia#755 from Mingun/fix-trim-end
Fix incorrect missing of trimming all-space text events when `trim_text_start = false` and `trim_text_end = true`
2 parents a44792f + 7558577 commit 65bf651

File tree

6 files changed

+248
-213
lines changed

6 files changed

+248
-213
lines changed

Changelog.md

+4
Original file line numberDiff line numberDiff line change
@@ -14,11 +14,15 @@
1414

1515
### Bug Fixes
1616

17+
- [#755]: Fix incorrect missing of trimming all-space text events when
18+
`trim_text_start = false` and `trim_text_end = true`.
19+
1720
### Misc Changes
1821

1922
- [#650]: Change the type of `Event::PI` to a new dedicated `BytesPI` type.
2023

2124
[#650]: https://github.com/tafia/quick-xml/issues/650
25+
[#755]: https://github.com/tafia/quick-xml/pull/755
2226

2327

2428
## 0.32.0 -- 2024-06-10

src/reader/async_tokio.rs

+2-13
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,8 @@ use crate::events::Event;
99
use crate::name::{QName, ResolveResult};
1010
use crate::reader::buffered_reader::impl_buffered_source;
1111
use crate::reader::{
12-
is_whitespace, BangType, ElementParser, NsReader, ParseState, Parser, PiParser, Reader, Span,
12+
is_whitespace, BangType, ElementParser, NsReader, ParseState, Parser, PiParser, ReadTextResult,
13+
Reader, Span,
1314
};
1415

1516
/// A struct for read XML asynchronously from an [`AsyncBufRead`].
@@ -77,7 +78,6 @@ impl<R: AsyncBufRead + Unpin> Reader<R> {
7778
read_event_impl!(
7879
self, buf,
7980
TokioAdapter(&mut self.reader),
80-
read_until_open_async,
8181
read_until_close_async,
8282
await
8383
)
@@ -141,17 +141,6 @@ impl<R: AsyncBufRead + Unpin> Reader<R> {
141141
Ok(read_to_end!(self, end, buf, read_event_into_async, { buf.clear(); }, await))
142142
}
143143

144-
/// Read until '<' is found, moves reader to an `OpenedTag` state and returns a `Text` event.
145-
///
146-
/// Returns inner `Ok` if the loop should be broken and an event returned.
147-
/// Returns inner `Err` with the same `buf` because Rust borrowck stumbles upon this case in particular.
148-
async fn read_until_open_async<'b>(
149-
&mut self,
150-
buf: &'b mut Vec<u8>,
151-
) -> Result<std::result::Result<Event<'b>, &'b mut Vec<u8>>> {
152-
read_until_open!(self, buf, TokioAdapter(&mut self.reader), read_event_into_async, await)
153-
}
154-
155144
/// Private function to read until `>` is found. This function expects that
156145
/// it was called just after encounter a `<` symbol.
157146
async fn read_until_close_async<'b>(&mut self, buf: &'b mut Vec<u8>) -> Result<Event<'b>> {

src/reader/buffered_reader.rs

+89-51
Original file line numberDiff line numberDiff line change
@@ -8,13 +8,13 @@ use std::path::Path;
88
use crate::errors::{Error, Result};
99
use crate::events::Event;
1010
use crate::name::QName;
11-
use crate::reader::{is_whitespace, BangType, Parser, Reader, Span, XmlSource};
11+
use crate::reader::{is_whitespace, BangType, Parser, ReadTextResult, Reader, Span, XmlSource};
1212

1313
macro_rules! impl_buffered_source {
1414
($($lf:lifetime, $reader:tt, $async:ident, $await:ident)?) => {
1515
#[cfg(not(feature = "encoding"))]
1616
#[inline]
17-
$($async)? fn remove_utf8_bom(&mut self) -> Result<()> {
17+
$($async)? fn remove_utf8_bom(&mut self) -> io::Result<()> {
1818
use crate::encoding::UTF8_BOM;
1919

2020
loop {
@@ -26,14 +26,14 @@ macro_rules! impl_buffered_source {
2626
Ok(())
2727
},
2828
Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
29-
Err(e) => Err(Error::Io(e.into())),
29+
Err(e) => Err(e),
3030
};
3131
}
3232
}
3333

3434
#[cfg(feature = "encoding")]
3535
#[inline]
36-
$($async)? fn detect_encoding(&mut self) -> Result<Option<&'static encoding_rs::Encoding>> {
36+
$($async)? fn detect_encoding(&mut self) -> io::Result<Option<&'static encoding_rs::Encoding>> {
3737
loop {
3838
break match self $(.$reader)? .fill_buf() $(.$await)? {
3939
Ok(n) => if let Some((enc, bom_len)) = crate::encoding::detect_encoding(n) {
@@ -43,54 +43,106 @@ macro_rules! impl_buffered_source {
4343
Ok(None)
4444
},
4545
Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
46-
Err(e) => Err(Error::Io(e.into())),
46+
Err(e) => Err(e),
4747
};
4848
}
4949
}
5050

51+
#[inline]
52+
$($async)? fn read_text $(<$lf>)? (
53+
&mut self,
54+
buf: &'b mut Vec<u8>,
55+
position: &mut usize,
56+
) -> ReadTextResult<'b, &'b mut Vec<u8>> {
57+
let mut read = 0;
58+
let start = buf.len();
59+
loop {
60+
let available = match self $(.$reader)? .fill_buf() $(.$await)? {
61+
Ok(n) if n.is_empty() => break,
62+
Ok(n) => n,
63+
Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
64+
Err(e) => {
65+
*position += read;
66+
return ReadTextResult::Err(e);
67+
}
68+
};
69+
70+
match memchr::memchr(b'<', available) {
71+
Some(0) => {
72+
self $(.$reader)? .consume(1);
73+
*position += 1;
74+
return ReadTextResult::Markup(buf);
75+
}
76+
Some(i) => {
77+
buf.extend_from_slice(&available[..i]);
78+
79+
let used = i + 1;
80+
self $(.$reader)? .consume(used);
81+
read += used;
82+
83+
*position += read;
84+
return ReadTextResult::UpToMarkup(&buf[start..]);
85+
}
86+
None => {
87+
buf.extend_from_slice(available);
88+
89+
let used = available.len();
90+
self $(.$reader)? .consume(used);
91+
read += used;
92+
}
93+
}
94+
}
95+
96+
*position += read;
97+
ReadTextResult::UpToEof(&buf[start..])
98+
}
99+
51100
#[inline]
52101
$($async)? fn read_bytes_until $(<$lf>)? (
53102
&mut self,
54103
byte: u8,
55104
buf: &'b mut Vec<u8>,
56105
position: &mut usize,
57-
) -> Result<(&'b [u8], bool)> {
106+
) -> io::Result<(&'b [u8], bool)> {
58107
// search byte must be within the ascii range
59108
debug_assert!(byte.is_ascii());
60109

61110
let mut read = 0;
62-
let mut done = false;
63111
let start = buf.len();
64-
while !done {
65-
let used = {
66-
let available = match self $(.$reader)? .fill_buf() $(.$await)? {
67-
Ok(n) if n.is_empty() => break,
68-
Ok(n) => n,
69-
Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
70-
Err(e) => {
71-
*position += read;
72-
return Err(Error::Io(e.into()));
73-
}
74-
};
75-
76-
match memchr::memchr(byte, available) {
77-
Some(i) => {
78-
buf.extend_from_slice(&available[..i]);
79-
done = true;
80-
i + 1
81-
}
82-
None => {
83-
buf.extend_from_slice(available);
84-
available.len()
85-
}
112+
loop {
113+
let available = match self $(.$reader)? .fill_buf() $(.$await)? {
114+
Ok(n) if n.is_empty() => break,
115+
Ok(n) => n,
116+
Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
117+
Err(e) => {
118+
*position += read;
119+
return Err(e);
86120
}
87121
};
88-
self $(.$reader)? .consume(used);
89-
read += used;
122+
123+
match memchr::memchr(byte, available) {
124+
Some(i) => {
125+
buf.extend_from_slice(&available[..i]);
126+
127+
let used = i + 1;
128+
self $(.$reader)? .consume(used);
129+
read += used;
130+
131+
*position += read;
132+
return Ok((&buf[start..], true));
133+
}
134+
None => {
135+
buf.extend_from_slice(available);
136+
137+
let used = available.len();
138+
self $(.$reader)? .consume(used);
139+
read += used;
140+
}
141+
}
90142
}
91-
*position += read;
92143

93-
Ok((&buf[start..], done))
144+
*position += read;
145+
Ok((&buf[start..], false))
94146
}
95147

96148
#[inline]
@@ -188,7 +240,7 @@ macro_rules! impl_buffered_source {
188240
}
189241

190242
#[inline]
191-
$($async)? fn skip_whitespace(&mut self, position: &mut usize) -> Result<()> {
243+
$($async)? fn skip_whitespace(&mut self, position: &mut usize) -> io::Result<()> {
192244
loop {
193245
break match self $(.$reader)? .fill_buf() $(.$await)? {
194246
Ok(n) => {
@@ -202,32 +254,18 @@ macro_rules! impl_buffered_source {
202254
}
203255
}
204256
Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
205-
Err(e) => Err(Error::Io(e.into())),
257+
Err(e) => Err(e),
206258
};
207259
}
208260
}
209261

210262
#[inline]
211-
$($async)? fn skip_one(&mut self, byte: u8) -> Result<bool> {
212-
// search byte must be within the ascii range
213-
debug_assert!(byte.is_ascii());
214-
215-
match self.peek_one() $(.$await)? ? {
216-
Some(b) if b == byte => {
217-
self $(.$reader)? .consume(1);
218-
Ok(true)
219-
}
220-
_ => Ok(false),
221-
}
222-
}
223-
224-
#[inline]
225-
$($async)? fn peek_one(&mut self) -> Result<Option<u8>> {
263+
$($async)? fn peek_one(&mut self) -> io::Result<Option<u8>> {
226264
loop {
227265
break match self $(.$reader)? .fill_buf() $(.$await)? {
228266
Ok(n) => Ok(n.first().cloned()),
229267
Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
230-
Err(e) => Err(Error::Io(e.into())),
268+
Err(e) => Err(e),
231269
};
232270
}
233271
}

0 commit comments

Comments
 (0)