Skip to content

Commit 69a1934

Browse files
committed
Add functions for attribute value normalization
closes #371
1 parent 5a536d0 commit 69a1934

File tree

6 files changed

+534
-12
lines changed

6 files changed

+534
-12
lines changed

Changelog.md

+12
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,16 @@
1414

1515
### Bug Fixes
1616

17+
- [#379]: Improved compliance with the XML attribute value normalization process by
18+
adding `Attribute::normalized_value()` and `Attribute::normalized_value_with()`,
19+
which ought to be used in place of `Attribute::unescape_value()` and
20+
`Attribute::unescape_value_with()`
21+
1722
### Misc Changes
1823

24+
- [#379]: Added tests for attribute value normalization
25+
26+
[#379]: https://github.com/tafia/quick-xml/pull/379
1927

2028
## 0.29.0 -- 2023-06-13
2129

@@ -111,6 +119,10 @@
111119
- [#565]: Fix compilation error when build with serde <1.0.139
112120

113121

122+
### New Tests
123+
124+
- [#379]: Added tests for attribute value normalization
125+
114126
[externally tagged]: https://serde.rs/enum-representations.html#externally-tagged
115127
[#490]: https://github.com/tafia/quick-xml/pull/490
116128
[#510]: https://github.com/tafia/quick-xml/issues/510

benches/macrobenches.rs

+4-8
Original file line numberDiff line numberDiff line change
@@ -43,14 +43,13 @@ static INPUTS: &[(&str, &str)] = &[
4343
("players.xml", PLAYERS),
4444
];
4545

46-
// TODO: use fully normalized attribute values
4746
fn parse_document_from_str(doc: &str) -> XmlResult<()> {
4847
let mut r = Reader::from_str(doc);
4948
loop {
5049
match criterion::black_box(r.read_event()?) {
5150
Event::Start(e) | Event::Empty(e) => {
5251
for attr in e.attributes() {
53-
criterion::black_box(attr?.decode_and_unescape_value(&r)?);
52+
criterion::black_box(attr?.decode_and_normalize_value(&r)?);
5453
}
5554
}
5655
Event::Text(e) => {
@@ -67,15 +66,14 @@ fn parse_document_from_str(doc: &str) -> XmlResult<()> {
6766
Ok(())
6867
}
6968

70-
// TODO: use fully normalized attribute values
7169
fn parse_document_from_bytes(doc: &[u8]) -> XmlResult<()> {
7270
let mut r = Reader::from_reader(doc);
7371
let mut buf = Vec::new();
7472
loop {
7573
match criterion::black_box(r.read_event_into(&mut buf)?) {
7674
Event::Start(e) | Event::Empty(e) => {
7775
for attr in e.attributes() {
78-
criterion::black_box(attr?.decode_and_unescape_value(&r)?);
76+
criterion::black_box(attr?.decode_and_normalize_value(&r)?);
7977
}
8078
}
8179
Event::Text(e) => {
@@ -93,15 +91,14 @@ fn parse_document_from_bytes(doc: &[u8]) -> XmlResult<()> {
9391
Ok(())
9492
}
9593

96-
// TODO: use fully normalized attribute values
9794
fn parse_document_from_str_with_namespaces(doc: &str) -> XmlResult<()> {
9895
let mut r = NsReader::from_str(doc);
9996
loop {
10097
match criterion::black_box(r.read_resolved_event()?) {
10198
(resolved_ns, Event::Start(e) | Event::Empty(e)) => {
10299
criterion::black_box(resolved_ns);
103100
for attr in e.attributes() {
104-
criterion::black_box(attr?.decode_and_unescape_value(&r)?);
101+
criterion::black_box(attr?.decode_and_normalize_value(&r)?);
105102
}
106103
}
107104
(resolved_ns, Event::Text(e)) => {
@@ -120,7 +117,6 @@ fn parse_document_from_str_with_namespaces(doc: &str) -> XmlResult<()> {
120117
Ok(())
121118
}
122119

123-
// TODO: use fully normalized attribute values
124120
fn parse_document_from_bytes_with_namespaces(doc: &[u8]) -> XmlResult<()> {
125121
let mut r = NsReader::from_reader(doc);
126122
let mut buf = Vec::new();
@@ -129,7 +125,7 @@ fn parse_document_from_bytes_with_namespaces(doc: &[u8]) -> XmlResult<()> {
129125
(resolved_ns, Event::Start(e) | Event::Empty(e)) => {
130126
criterion::black_box(resolved_ns);
131127
for attr in e.attributes() {
132-
criterion::black_box(attr?.decode_and_unescape_value(&r)?);
128+
criterion::black_box(attr?.decode_and_normalize_value(&r)?);
133129
}
134130
}
135131
(resolved_ns, Event::Text(e)) => {

benches/microbenches.rs

+72
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
1+
use std::borrow::Cow;
2+
13
use criterion::{self, criterion_group, criterion_main, Criterion};
24
use pretty_assertions::assert_eq;
35
use quick_xml::escape::{escape, unescape};
6+
use quick_xml::events::attributes::Attribute;
47
use quick_xml::events::Event;
58
use quick_xml::name::QName;
69
use quick_xml::reader::{NsReader, Reader};
@@ -242,6 +245,74 @@ fn attributes(c: &mut Criterion) {
242245
assert_eq!(count, 150);
243246
})
244247
});
248+
249+
group.finish();
250+
}
251+
252+
/// Benchmarks normalizing attribute values
253+
fn attribute_value_normalization(c: &mut Criterion) {
254+
let mut group = c.benchmark_group("attribute_value_normalization");
255+
256+
group.bench_function("noop_short", |b| {
257+
let attr = Attribute {
258+
key: QName(b"foo"),
259+
value: Cow::Borrowed(b"foobar"),
260+
};
261+
b.iter(|| {
262+
criterion::black_box(attr.normalized_value()).unwrap();
263+
})
264+
});
265+
266+
group.bench_function("noop_long", |b| {
267+
let attr = Attribute {
268+
key: QName(b"foo"),
269+
value: Cow::Borrowed(LOREM_IPSUM_TEXT.as_bytes()),
270+
};
271+
b.iter(|| {
272+
criterion::black_box(attr.normalized_value()).unwrap();
273+
})
274+
});
275+
276+
group.bench_function("replacement_chars", |b| {
277+
let attr = Attribute {
278+
key: QName(b"foo"),
279+
value: Cow::Borrowed(b"just a bit\n of text without\tany entities"),
280+
};
281+
b.iter(|| {
282+
criterion::black_box(attr.normalized_value()).unwrap();
283+
})
284+
});
285+
286+
group.bench_function("char_reference", |b| {
287+
let attr1 = Attribute {
288+
key: QName(b"foo"),
289+
value: Cow::Borrowed(b"prefix &#34;some stuff&#34;,&#x22;more stuff&#x22;"),
290+
};
291+
let attr2 = Attribute {
292+
key: QName(b"foo"),
293+
value: Cow::Borrowed(b"&#38;&#60;"),
294+
};
295+
b.iter(|| {
296+
criterion::black_box(attr1.normalized_value()).unwrap();
297+
criterion::black_box(attr2.normalized_value()).unwrap();
298+
})
299+
});
300+
301+
group.bench_function("entity_reference", |b| {
302+
let attr1 = Attribute {
303+
key: QName(b"foo"),
304+
value: Cow::Borrowed(b"age &gt; 72 &amp;&amp; age &lt; 21"),
305+
};
306+
let attr2 = Attribute {
307+
key: QName(b"foo"),
308+
value: Cow::Borrowed(b"&quot;what&apos;s that?&quot;"),
309+
};
310+
b.iter(|| {
311+
criterion::black_box(attr1.normalized_value()).unwrap();
312+
criterion::black_box(attr2.normalized_value()).unwrap();
313+
})
314+
});
315+
245316
group.finish();
246317
}
247318

@@ -354,6 +425,7 @@ criterion_group!(
354425
read_resolved_event_into,
355426
one_event,
356427
attributes,
428+
attribute_value_normalization,
357429
escaping,
358430
unescaping,
359431
);

src/errors.rs

+1
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,7 @@ impl From<EscapeError> for Error {
8282
}
8383

8484
impl From<AttrError> for Error {
85+
/// Creates a new `Error::InvalidAttr` from the given error
8586
#[inline]
8687
fn from(error: AttrError) -> Self {
8788
Error::InvalidAttr(error)

0 commit comments

Comments
 (0)