Skip to content

Commit 649f3d8

Browse files
authored
Merge pull request tafia#767 from Mingun/move-to-integration
Convert some unit tests to integration tests
2 parents 2659775 + a24ed89 commit 649f3d8

11 files changed

+812
-827
lines changed

src/de/mod.rs

+20-34
Original file line numberDiff line numberDiff line change
@@ -2165,8 +2165,9 @@ impl<'i, R: XmlRead<'i>, E: EntityResolver> XmlReader<'i, R, E> {
21652165
replace(&mut self.lookahead, self.reader.next())
21662166
}
21672167

2168+
/// Returns `true` when next event is not a text event in any form.
21682169
#[inline(always)]
2169-
const fn need_trim_end(&self) -> bool {
2170+
const fn current_event_is_last_text(&self) -> bool {
21702171
// If next event is a text or CDATA, we should not trim trailing spaces
21712172
!matches!(
21722173
self.lookahead,
@@ -2182,43 +2183,27 @@ impl<'i, R: XmlRead<'i>, E: EntityResolver> XmlReader<'i, R, E> {
21822183
/// [`CData`]: PayloadEvent::CData
21832184
fn drain_text(&mut self, mut result: Cow<'i, str>) -> Result<DeEvent<'i>, DeError> {
21842185
loop {
2185-
match self.lookahead {
2186-
Ok(PayloadEvent::Text(_) | PayloadEvent::CData(_)) => {
2187-
let text = self.next_text()?;
2188-
2189-
let mut s = result.into_owned();
2190-
s += &text;
2191-
result = Cow::Owned(s);
2192-
}
2193-
_ => break,
2186+
if self.current_event_is_last_text() {
2187+
break;
21942188
}
2195-
}
2196-
Ok(DeEvent::Text(Text { text: result }))
2197-
}
21982189

2199-
/// Read one text event, panics if current event is not a text event
2200-
///
2201-
/// |Event |XML |Handling
2202-
/// |-----------------------|---------------------------|----------------------------------------
2203-
/// |[`PayloadEvent::Start`]|`<tag>...</tag>` |Possible panic (unreachable)
2204-
/// |[`PayloadEvent::End`] |`</any-tag>` |Possible panic (unreachable)
2205-
/// |[`PayloadEvent::Text`] |`text content` |Unescapes `text content` and returns it
2206-
/// |[`PayloadEvent::CData`]|`<![CDATA[cdata content]]>`|Returns `cdata content` unchanged
2207-
/// |[`PayloadEvent::Eof`] | |Possible panic (unreachable)
2208-
#[inline(always)]
2209-
fn next_text(&mut self) -> Result<Cow<'i, str>, DeError> {
2210-
match self.next_impl()? {
2211-
PayloadEvent::Text(mut e) => {
2212-
if self.need_trim_end() {
2213-
e.inplace_trim_end();
2190+
match self.next_impl()? {
2191+
PayloadEvent::Text(mut e) => {
2192+
if self.current_event_is_last_text() {
2193+
// FIXME: Actually, we should trim after decoding text, but now we trim before
2194+
e.inplace_trim_end();
2195+
}
2196+
result
2197+
.to_mut()
2198+
.push_str(&e.unescape_with(|entity| self.entity_resolver.resolve(entity))?);
22142199
}
2215-
Ok(e.unescape_with(|entity| self.entity_resolver.resolve(entity))?)
2216-
}
2217-
PayloadEvent::CData(e) => Ok(e.decode()?),
2200+
PayloadEvent::CData(e) => result.to_mut().push_str(&e.decode()?),
22182201

2219-
// SAFETY: this method is called only when we peeked Text or CData
2220-
_ => unreachable!("Only `Text` and `CData` events can come here"),
2202+
// SAFETY: current_event_is_last_text checks that event is Text or CData
2203+
_ => unreachable!("Only `Text` and `CData` events can come here"),
2204+
}
22212205
}
2206+
Ok(DeEvent::Text(Text { text: result }))
22222207
}
22232208

22242209
/// Return an input-borrowing event.
@@ -2228,7 +2213,8 @@ impl<'i, R: XmlRead<'i>, E: EntityResolver> XmlReader<'i, R, E> {
22282213
PayloadEvent::Start(e) => Ok(DeEvent::Start(e)),
22292214
PayloadEvent::End(e) => Ok(DeEvent::End(e)),
22302215
PayloadEvent::Text(mut e) => {
2231-
if self.need_trim_end() && e.inplace_trim_end() {
2216+
if self.current_event_is_last_text() && e.inplace_trim_end() {
2217+
// FIXME: Actually, we should trim after decoding text, but now we trim before
22322218
continue;
22332219
}
22342220
self.drain_text(e.unescape_with(|entity| self.entity_resolver.resolve(entity))?)

src/reader/async_tokio.rs

+1-7
Original file line numberDiff line numberDiff line change
@@ -359,7 +359,7 @@ impl<R: AsyncBufRead + Unpin> NsReader<R> {
359359
#[cfg(test)]
360360
mod test {
361361
use super::TokioAdapter;
362-
use crate::reader::test::{check, small_buffers};
362+
use crate::reader::test::check;
363363

364364
check!(
365365
#[tokio::test]
@@ -370,12 +370,6 @@ mod test {
370370
async, await
371371
);
372372

373-
small_buffers!(
374-
#[tokio::test]
375-
read_event_into_async: tokio::io::BufReader<_>,
376-
async, await
377-
);
378-
379373
#[test]
380374
fn test_future_is_send() {
381375
// This test should just compile, no actual runtime checks are performed here.

src/reader/buffered_reader.rs

+1-56
Original file line numberDiff line numberDiff line change
@@ -445,7 +445,7 @@ impl Reader<BufReader<File>> {
445445

446446
#[cfg(test)]
447447
mod test {
448-
use crate::reader::test::{check, small_buffers};
448+
use crate::reader::test::check;
449449
use crate::reader::XmlSource;
450450

451451
/// Default buffer constructor just pass the byte array from the test
@@ -460,59 +460,4 @@ mod test {
460460
identity,
461461
&mut Vec::new()
462462
);
463-
464-
small_buffers!(
465-
#[test]
466-
read_event_into: std::io::BufReader<_>
467-
);
468-
469-
#[cfg(feature = "encoding")]
470-
mod encoding {
471-
use crate::events::Event;
472-
use crate::reader::Reader;
473-
use encoding_rs::{UTF_16LE, UTF_8, WINDOWS_1251};
474-
use pretty_assertions::assert_eq;
475-
476-
/// Checks that encoding is detected by BOM and changed after XML declaration
477-
/// BOM indicates UTF-16LE, but XML - windows-1251
478-
#[test]
479-
fn bom_detected() {
480-
let mut reader =
481-
Reader::from_reader(b"\xFF\xFE<?xml encoding='windows-1251'?>".as_ref());
482-
let mut buf = Vec::new();
483-
484-
assert_eq!(reader.decoder().encoding(), UTF_8);
485-
assert!(matches!(
486-
reader.read_event_into(&mut buf).unwrap(),
487-
Event::Decl(_)
488-
));
489-
assert_eq!(reader.decoder().encoding(), WINDOWS_1251);
490-
491-
assert_eq!(reader.read_event_into(&mut buf).unwrap(), Event::Eof);
492-
}
493-
494-
/// Checks that encoding is changed by XML declaration, but only once
495-
#[test]
496-
fn xml_declaration() {
497-
let mut reader = Reader::from_reader(
498-
b"<?xml encoding='UTF-16'?><?xml encoding='windows-1251'?>".as_ref(),
499-
);
500-
let mut buf = Vec::new();
501-
502-
assert_eq!(reader.decoder().encoding(), UTF_8);
503-
assert!(matches!(
504-
reader.read_event_into(&mut buf).unwrap(),
505-
Event::Decl(_)
506-
));
507-
assert_eq!(reader.decoder().encoding(), UTF_16LE);
508-
509-
assert!(matches!(
510-
reader.read_event_into(&mut buf).unwrap(),
511-
Event::Decl(_)
512-
));
513-
assert_eq!(reader.decoder().encoding(), UTF_16LE);
514-
515-
assert_eq!(reader.read_event_into(&mut buf).unwrap(), Event::Eof);
516-
}
517-
}
518463
}

src/reader/mod.rs

-149
Original file line numberDiff line numberDiff line change
@@ -1826,157 +1826,8 @@ mod test {
18261826
};
18271827
}
18281828

1829-
/// Tests for https://github.com/tafia/quick-xml/issues/469
1830-
macro_rules! small_buffers {
1831-
(
1832-
#[$test:meta]
1833-
$read_event:ident: $BufReader:ty
1834-
$(, $async:ident, $await:ident)?
1835-
) => {
1836-
mod small_buffers {
1837-
use crate::events::{BytesCData, BytesDecl, BytesPI, BytesStart, BytesText, Event};
1838-
use crate::reader::Reader;
1839-
use pretty_assertions::assert_eq;
1840-
1841-
#[$test]
1842-
$($async)? fn decl() {
1843-
let xml = "<?xml ?>";
1844-
// ^^^^^^^ data that fit into buffer
1845-
let size = xml.match_indices("?>").next().unwrap().0 + 1;
1846-
let br = <$BufReader>::with_capacity(size, xml.as_bytes());
1847-
let mut reader = Reader::from_reader(br);
1848-
let mut buf = Vec::new();
1849-
1850-
assert_eq!(
1851-
reader.$read_event(&mut buf) $(.$await)? .unwrap(),
1852-
Event::Decl(BytesDecl::from_start(BytesStart::from_content("xml ", 3)))
1853-
);
1854-
assert_eq!(
1855-
reader.$read_event(&mut buf) $(.$await)? .unwrap(),
1856-
Event::Eof
1857-
);
1858-
}
1859-
1860-
#[$test]
1861-
$($async)? fn pi() {
1862-
let xml = "<?pi?>";
1863-
// ^^^^^ data that fit into buffer
1864-
let size = xml.match_indices("?>").next().unwrap().0 + 1;
1865-
let br = <$BufReader>::with_capacity(size, xml.as_bytes());
1866-
let mut reader = Reader::from_reader(br);
1867-
let mut buf = Vec::new();
1868-
1869-
assert_eq!(
1870-
reader.$read_event(&mut buf) $(.$await)? .unwrap(),
1871-
Event::PI(BytesPI::new("pi"))
1872-
);
1873-
assert_eq!(
1874-
reader.$read_event(&mut buf) $(.$await)? .unwrap(),
1875-
Event::Eof
1876-
);
1877-
}
1878-
1879-
#[$test]
1880-
$($async)? fn empty() {
1881-
let xml = "<empty/>";
1882-
// ^^^^^^^ data that fit into buffer
1883-
let size = xml.match_indices("/>").next().unwrap().0 + 1;
1884-
let br = <$BufReader>::with_capacity(size, xml.as_bytes());
1885-
let mut reader = Reader::from_reader(br);
1886-
let mut buf = Vec::new();
1887-
1888-
assert_eq!(
1889-
reader.$read_event(&mut buf) $(.$await)? .unwrap(),
1890-
Event::Empty(BytesStart::new("empty"))
1891-
);
1892-
assert_eq!(
1893-
reader.$read_event(&mut buf) $(.$await)? .unwrap(),
1894-
Event::Eof
1895-
);
1896-
}
1897-
1898-
#[$test]
1899-
$($async)? fn cdata1() {
1900-
let xml = "<![CDATA[cdata]]>";
1901-
// ^^^^^^^^^^^^^^^ data that fit into buffer
1902-
let size = xml.match_indices("]]>").next().unwrap().0 + 1;
1903-
let br = <$BufReader>::with_capacity(size, xml.as_bytes());
1904-
let mut reader = Reader::from_reader(br);
1905-
let mut buf = Vec::new();
1906-
1907-
assert_eq!(
1908-
reader.$read_event(&mut buf) $(.$await)? .unwrap(),
1909-
Event::CData(BytesCData::new("cdata"))
1910-
);
1911-
assert_eq!(
1912-
reader.$read_event(&mut buf) $(.$await)? .unwrap(),
1913-
Event::Eof
1914-
);
1915-
}
1916-
1917-
#[$test]
1918-
$($async)? fn cdata2() {
1919-
let xml = "<![CDATA[cdata]]>";
1920-
// ^^^^^^^^^^^^^^^^ data that fit into buffer
1921-
let size = xml.match_indices("]]>").next().unwrap().0 + 2;
1922-
let br = <$BufReader>::with_capacity(size, xml.as_bytes());
1923-
let mut reader = Reader::from_reader(br);
1924-
let mut buf = Vec::new();
1925-
1926-
assert_eq!(
1927-
reader.$read_event(&mut buf) $(.$await)? .unwrap(),
1928-
Event::CData(BytesCData::new("cdata"))
1929-
);
1930-
assert_eq!(
1931-
reader.$read_event(&mut buf) $(.$await)? .unwrap(),
1932-
Event::Eof
1933-
);
1934-
}
1935-
1936-
#[$test]
1937-
$($async)? fn comment1() {
1938-
let xml = "<!--comment-->";
1939-
// ^^^^^^^^^^^^ data that fit into buffer
1940-
let size = xml.match_indices("-->").next().unwrap().0 + 1;
1941-
let br = <$BufReader>::with_capacity(size, xml.as_bytes());
1942-
let mut reader = Reader::from_reader(br);
1943-
let mut buf = Vec::new();
1944-
1945-
assert_eq!(
1946-
reader.$read_event(&mut buf) $(.$await)? .unwrap(),
1947-
Event::Comment(BytesText::new("comment"))
1948-
);
1949-
assert_eq!(
1950-
reader.$read_event(&mut buf) $(.$await)? .unwrap(),
1951-
Event::Eof
1952-
);
1953-
}
1954-
1955-
#[$test]
1956-
$($async)? fn comment2() {
1957-
let xml = "<!--comment-->";
1958-
// ^^^^^^^^^^^^^ data that fit into buffer
1959-
let size = xml.match_indices("-->").next().unwrap().0 + 2;
1960-
let br = <$BufReader>::with_capacity(size, xml.as_bytes());
1961-
let mut reader = Reader::from_reader(br);
1962-
let mut buf = Vec::new();
1963-
1964-
assert_eq!(
1965-
reader.$read_event(&mut buf) $(.$await)? .unwrap(),
1966-
Event::Comment(BytesText::new("comment"))
1967-
);
1968-
assert_eq!(
1969-
reader.$read_event(&mut buf) $(.$await)? .unwrap(),
1970-
Event::Eof
1971-
);
1972-
}
1973-
}
1974-
};
1975-
}
1976-
19771829
// Export macros for the child modules:
19781830
// - buffered_reader
19791831
// - slice_reader
19801832
pub(super) use check;
1981-
pub(super) use small_buffers;
19821833
}

src/reader/slice_reader.rs

-21
Original file line numberDiff line numberDiff line change
@@ -376,25 +376,4 @@ mod test {
376376
identity,
377377
()
378378
);
379-
380-
#[cfg(feature = "encoding")]
381-
mod encoding {
382-
use crate::events::Event;
383-
use crate::reader::Reader;
384-
use encoding_rs::UTF_8;
385-
use pretty_assertions::assert_eq;
386-
387-
/// Checks that XML declaration cannot change the encoding from UTF-8 if
388-
/// a `Reader` was created using `from_str` method
389-
#[test]
390-
fn str_always_has_utf8() {
391-
let mut reader = Reader::from_str("<?xml encoding='UTF-16'?>");
392-
393-
assert_eq!(reader.decoder().encoding(), UTF_8);
394-
reader.read_event().unwrap();
395-
assert_eq!(reader.decoder().encoding(), UTF_8);
396-
397-
assert_eq!(reader.read_event().unwrap(), Event::Eof);
398-
}
399-
}
400379
}

src/reader/state.rs

+21-2
Original file line numberDiff line numberDiff line change
@@ -71,9 +71,20 @@ impl ReaderState {
7171
BytesText::wrap(content, self.decoder())
7272
}
7373

74-
/// reads `BytesElement` starting with a `!`,
75-
/// return `Comment`, `CData` or `DocType` event
74+
/// Returns `Comment`, `CData` or `DocType` event.
75+
///
76+
/// `buf` contains data between `<` and `>`:
77+
/// - CDATA: `![CDATA[...]]`
78+
/// - Comment: `!--...--`
79+
/// - Doctype (uppercase): `!D...`
80+
/// - Doctype (lowercase): `!d...`
7681
pub fn emit_bang<'b>(&mut self, bang_type: BangType, buf: &'b [u8]) -> Result<Event<'b>> {
82+
debug_assert_eq!(
83+
buf.first(),
84+
Some(&b'!'),
85+
"CDATA, comment or DOCTYPE should start from '!'"
86+
);
87+
7788
let uncased_starts_with = |string: &[u8], prefix: &[u8]| {
7889
string.len() >= prefix.len() && string[..prefix.len()].eq_ignore_ascii_case(prefix)
7990
};
@@ -153,7 +164,15 @@ impl ReaderState {
153164

154165
/// Wraps content of `buf` into the [`Event::End`] event. Does the check that
155166
/// end name matches the last opened start name if `self.config.check_end_names` is set.
167+
///
168+
/// `buf` contains data between `<` and `>`, for example `/tag`.
156169
pub fn emit_end<'b>(&mut self, buf: &'b [u8]) -> Result<Event<'b>> {
170+
debug_assert_eq!(
171+
buf.first(),
172+
Some(&b'/'),
173+
"closing tag should start from '/'"
174+
);
175+
157176
// Strip the `/` character. `content` contains data between `</` and `>`
158177
let content = &buf[1..];
159178
// XML standard permits whitespaces after the markup name in closing tags.

0 commit comments

Comments
 (0)