Skip to content

Commit 864c4e2

Browse files
committed
u64 offsets
1 parent 2659775 commit 864c4e2

File tree

5 files changed

+74
-70
lines changed

5 files changed

+74
-70
lines changed

src/reader/buffered_reader.rs

+14-14
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ macro_rules! impl_buffered_source {
5353
$($async)? fn read_text $(<$lf>)? (
5454
&mut self,
5555
buf: &'b mut Vec<u8>,
56-
position: &mut usize,
56+
position: &mut u64,
5757
) -> ReadTextResult<'b, &'b mut Vec<u8>> {
5858
let mut read = 0;
5959
let start = buf.len();
@@ -79,7 +79,7 @@ macro_rules! impl_buffered_source {
7979

8080
let used = i + 1;
8181
self $(.$reader)? .consume(used);
82-
read += used;
82+
read += used as u64;
8383

8484
*position += read;
8585
return ReadTextResult::UpToMarkup(&buf[start..]);
@@ -89,7 +89,7 @@ macro_rules! impl_buffered_source {
8989

9090
let used = available.len();
9191
self $(.$reader)? .consume(used);
92-
read += used;
92+
read += used as u64;
9393
}
9494
}
9595
}
@@ -103,7 +103,7 @@ macro_rules! impl_buffered_source {
103103
&mut self,
104104
byte: u8,
105105
buf: &'b mut Vec<u8>,
106-
position: &mut usize,
106+
position: &mut u64,
107107
) -> io::Result<(&'b [u8], bool)> {
108108
// search byte must be within the ascii range
109109
debug_assert!(byte.is_ascii());
@@ -127,7 +127,7 @@ macro_rules! impl_buffered_source {
127127

128128
let used = i + 1;
129129
self $(.$reader)? .consume(used);
130-
read += used;
130+
read += used as u64;
131131

132132
*position += read;
133133
return Ok((&buf[start..], true));
@@ -137,7 +137,7 @@ macro_rules! impl_buffered_source {
137137

138138
let used = available.len();
139139
self $(.$reader)? .consume(used);
140-
read += used;
140+
read += used as u64;
141141
}
142142
}
143143
}
@@ -151,7 +151,7 @@ macro_rules! impl_buffered_source {
151151
&mut self,
152152
mut parser: P,
153153
buf: &'b mut Vec<u8>,
154-
position: &mut usize,
154+
position: &mut u64,
155155
) -> Result<&'b [u8]> {
156156
let mut read = 0;
157157
let start = buf.len();
@@ -171,7 +171,7 @@ macro_rules! impl_buffered_source {
171171

172172
// +1 for `>` which we do not include
173173
self $(.$reader)? .consume(i + 1);
174-
read += i + 1;
174+
read += i as u64 + 1;
175175

176176
*position += read;
177177
return Ok(&buf[start..]);
@@ -182,7 +182,7 @@ macro_rules! impl_buffered_source {
182182

183183
let used = available.len();
184184
self $(.$reader)? .consume(used);
185-
read += used;
185+
read += used as u64;
186186
}
187187

188188
*position += read;
@@ -193,7 +193,7 @@ macro_rules! impl_buffered_source {
193193
$($async)? fn read_bang_element $(<$lf>)? (
194194
&mut self,
195195
buf: &'b mut Vec<u8>,
196-
position: &mut usize,
196+
position: &mut u64,
197197
) -> Result<(BangType, &'b [u8])> {
198198
// Peeked one bang ('!') before being called, so it's guaranteed to
199199
// start with it.
@@ -216,7 +216,7 @@ macro_rules! impl_buffered_source {
216216
buf.extend_from_slice(consumed);
217217

218218
self $(.$reader)? .consume(used);
219-
read += used;
219+
read += used as u64;
220220

221221
*position += read;
222222
return Ok((bang_type, &buf[start..]));
@@ -225,7 +225,7 @@ macro_rules! impl_buffered_source {
225225

226226
let used = available.len();
227227
self $(.$reader)? .consume(used);
228-
read += used;
228+
read += used as u64;
229229
}
230230
}
231231
Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
@@ -241,14 +241,14 @@ macro_rules! impl_buffered_source {
241241
}
242242

243243
#[inline]
244-
$($async)? fn skip_whitespace(&mut self, position: &mut usize) -> io::Result<()> {
244+
$($async)? fn skip_whitespace(&mut self, position: &mut u64) -> io::Result<()> {
245245
loop {
246246
break match self $(.$reader)? .fill_buf() $(.$await)? {
247247
Ok(n) => {
248248
let count = n.iter().position(|b| !is_whitespace(*b)).unwrap_or(n.len());
249249
if count > 0 {
250250
self $(.$reader)? .consume(count);
251-
*position += count;
251+
*position += count as u64;
252252
continue;
253253
} else {
254254
Ok(())

src/reader/mod.rs

+10-9
Original file line numberDiff line numberDiff line change
@@ -416,7 +416,7 @@ pub use ns_reader::NsReader;
416416
pub use pi::PiParser;
417417

418418
/// Range of input in bytes, that corresponds to some piece of XML
419-
pub type Span = Range<usize>;
419+
pub type Span = Range<u64>;
420420

421421
////////////////////////////////////////////////////////////////////////////////////////////////////
422422

@@ -619,7 +619,8 @@ impl<R> Reader<R> {
619619
/// let mut buf = Vec::new();
620620
///
621621
/// fn into_line_and_column(reader: Reader<Cursor<&[u8]>>) -> (usize, usize) {
622-
/// let end_pos = reader.buffer_position();
622+
/// // We known that size cannot exceed usize::MAX because we created parser from single &[u8]
623+
/// let end_pos = reader.buffer_position() as usize;
623624
/// let mut cursor = reader.into_inner();
624625
/// let s = String::from_utf8(cursor.into_inner()[0..end_pos].to_owned())
625626
/// .expect("can't make a string");
@@ -667,7 +668,7 @@ impl<R> Reader<R> {
667668
}
668669

669670
/// Gets the current byte position in the input data.
670-
pub const fn buffer_position(&self) -> usize {
671+
pub const fn buffer_position(&self) -> u64 {
671672
// when internal state is InsideMarkup, we have actually read until '<',
672673
// which we don't want to show
673674
if let ParseState::InsideMarkup = self.state.state {
@@ -688,7 +689,7 @@ impl<R> Reader<R> {
688689
/// markup element (i. e. to the `<` character).
689690
///
690691
/// This position is always `<= buffer_position()`.
691-
pub const fn error_position(&self) -> usize {
692+
pub const fn error_position(&self) -> u64 {
692693
self.state.last_error_offset
693694
}
694695

@@ -813,7 +814,7 @@ trait XmlSource<'r, B> {
813814
/// - `position`: Will be increased by amount of bytes consumed
814815
///
815816
/// [events]: crate::events::Event
816-
fn read_text(&mut self, buf: B, position: &mut usize) -> ReadTextResult<'r, B>;
817+
fn read_text(&mut self, buf: B, position: &mut u64) -> ReadTextResult<'r, B>;
817818

818819
/// Read input until `byte` is found or end of input is reached.
819820
///
@@ -845,7 +846,7 @@ trait XmlSource<'r, B> {
845846
&mut self,
846847
byte: u8,
847848
buf: B,
848-
position: &mut usize,
849+
position: &mut u64,
849850
) -> io::Result<(&'r [u8], bool)>;
850851

851852
/// Read input until processing instruction is finished.
@@ -867,7 +868,7 @@ trait XmlSource<'r, B> {
867868
/// reader which provides bytes fed into the parser.
868869
///
869870
/// [events]: crate::events::Event
870-
fn read_with<P>(&mut self, parser: P, buf: B, position: &mut usize) -> Result<&'r [u8]>
871+
fn read_with<P>(&mut self, parser: P, buf: B, position: &mut u64) -> Result<&'r [u8]>
871872
where
872873
P: Parser;
873874

@@ -886,14 +887,14 @@ trait XmlSource<'r, B> {
886887
/// - `position`: Will be increased by amount of bytes consumed
887888
///
888889
/// [events]: crate::events::Event
889-
fn read_bang_element(&mut self, buf: B, position: &mut usize) -> Result<(BangType, &'r [u8])>;
890+
fn read_bang_element(&mut self, buf: B, position: &mut u64) -> Result<(BangType, &'r [u8])>;
890891

891892
/// Consume and discard all the whitespace until the next non-whitespace
892893
/// character or EOF.
893894
///
894895
/// # Parameters
895896
/// - `position`: Will be increased by amount of bytes consumed
896-
fn skip_whitespace(&mut self, position: &mut usize) -> io::Result<()>;
897+
fn skip_whitespace(&mut self, position: &mut u64) -> io::Result<()>;
897898

898899
/// Return one character without consuming it, so that future `read_*` calls
899900
/// will still include it. On EOF, return `None`.

src/reader/slice_reader.rs

+18-15
Original file line numberDiff line numberDiff line change
@@ -229,7 +229,10 @@ impl<'a> Reader<&'a [u8]> {
229229
let buffer = self.reader;
230230
let span = self.read_to_end(end)?;
231231

232-
self.decoder().decode(&buffer[0..span.len()])
232+
let len = span.end - span.start;
233+
// SAFETY: Span can only contains indexes up to usize::MAX because they are
234+
// lengths of the same single &[u8] buffer
235+
self.decoder().decode(&buffer[0..len as usize])
233236
}
234237
}
235238

@@ -258,21 +261,21 @@ impl<'a> XmlSource<'a, ()> for &'a [u8] {
258261
}
259262

260263
#[inline]
261-
fn read_text(&mut self, _buf: (), position: &mut usize) -> ReadTextResult<'a, ()> {
264+
fn read_text(&mut self, _buf: (), position: &mut u64) -> ReadTextResult<'a, ()> {
262265
match memchr::memchr(b'<', self) {
263266
Some(0) => {
264267
*position += 1;
265268
*self = &self[1..];
266269
ReadTextResult::Markup(())
267270
}
268271
Some(i) => {
269-
*position += i + 1;
272+
*position += i as u64 + 1;
270273
let bytes = &self[..i];
271274
*self = &self[i + 1..];
272275
ReadTextResult::UpToMarkup(bytes)
273276
}
274277
None => {
275-
*position += self.len();
278+
*position += self.len() as u64;
276279
let bytes = &self[..];
277280
*self = &[];
278281
ReadTextResult::UpToEof(bytes)
@@ -285,46 +288,46 @@ impl<'a> XmlSource<'a, ()> for &'a [u8] {
285288
&mut self,
286289
byte: u8,
287290
_buf: (),
288-
position: &mut usize,
291+
position: &mut u64,
289292
) -> io::Result<(&'a [u8], bool)> {
290293
// search byte must be within the ascii range
291294
debug_assert!(byte.is_ascii());
292295

293296
if let Some(i) = memchr::memchr(byte, self) {
294-
*position += i + 1;
297+
*position += i as u64 + 1;
295298
let bytes = &self[..i];
296299
*self = &self[i + 1..];
297300
Ok((bytes, true))
298301
} else {
299-
*position += self.len();
302+
*position += self.len() as u64;
300303
let bytes = &self[..];
301304
*self = &[];
302305
Ok((bytes, false))
303306
}
304307
}
305308

306309
#[inline]
307-
fn read_with<P>(&mut self, mut parser: P, _buf: (), position: &mut usize) -> Result<&'a [u8]>
310+
fn read_with<P>(&mut self, mut parser: P, _buf: (), position: &mut u64) -> Result<&'a [u8]>
308311
where
309312
P: Parser,
310313
{
311314
if let Some(i) = parser.feed(self) {
312315
// +1 for `>` which we do not include
313-
*position += i + 1;
316+
*position += i as u64 + 1;
314317
let bytes = &self[..i];
315318
*self = &self[i + 1..];
316319
return Ok(bytes);
317320
}
318321

319-
*position += self.len();
322+
*position += self.len() as u64;
320323
Err(Error::Syntax(P::eof_error()))
321324
}
322325

323326
#[inline]
324327
fn read_bang_element(
325328
&mut self,
326329
_buf: (),
327-
position: &mut usize,
330+
position: &mut u64,
328331
) -> Result<(BangType, &'a [u8])> {
329332
// Peeked one bang ('!') before being called, so it's guaranteed to
330333
// start with it.
@@ -333,22 +336,22 @@ impl<'a> XmlSource<'a, ()> for &'a [u8] {
333336
let bang_type = BangType::new(self[1..].first().copied())?;
334337

335338
if let Some((bytes, i)) = bang_type.parse(&[], self) {
336-
*position += i;
339+
*position += i as u64;
337340
*self = &self[i..];
338341
return Ok((bang_type, bytes));
339342
}
340343

341-
*position += self.len();
344+
*position += self.len() as u64;
342345
Err(bang_type.to_err())
343346
}
344347

345348
#[inline]
346-
fn skip_whitespace(&mut self, position: &mut usize) -> io::Result<()> {
349+
fn skip_whitespace(&mut self, position: &mut u64) -> io::Result<()> {
347350
let whitespaces = self
348351
.iter()
349352
.position(|b| !is_whitespace(*b))
350353
.unwrap_or(self.len());
351-
*position += whitespaces;
354+
*position += whitespaces as u64;
352355
*self = &self[whitespaces..];
353356
Ok(())
354357
}

src/reader/state.rs

+7-7
Original file line numberDiff line numberDiff line change
@@ -15,12 +15,12 @@ use crate::utils::{is_whitespace, name_len};
1515
#[derive(Clone, Debug)]
1616
pub(super) struct ReaderState {
1717
/// Number of bytes read from the source of data since the reader was created
18-
pub offset: usize,
18+
pub offset: u64,
1919
/// A snapshot of an `offset` of the last error returned. It can be less than
2020
/// `offset`, because some errors conveniently report at earlier position,
2121
/// and changing `offset` is not possible, because `Error::IllFormed` errors
2222
/// are recoverable.
23-
pub last_error_offset: usize,
23+
pub last_error_offset: u64,
2424
/// Defines how to process next byte
2525
pub state: ParseState,
2626
/// User-defined settings that affect parsing
@@ -104,7 +104,7 @@ impl ReaderState {
104104
// ^ : : - self.offset - len
105105
// ^ : - self.offset - len + 2
106106
// ^ - self.offset - len + 2 + p
107-
self.last_error_offset = self.offset - len + 2 + p;
107+
self.last_error_offset = self.offset - len as u64 + 2 + p as u64;
108108
return Err(Error::IllFormed(IllFormedError::DoubleHyphenInComment));
109109
}
110110
// Continue search after single `-` (+1 to skip it)
@@ -145,7 +145,7 @@ impl ReaderState {
145145
// <!....>
146146
// ^^^^^ - `buf` does not contain `<` and `>`, but `self.offset` is after `>`.
147147
// ^------- We report error at that position, so we need to subtract 2 and buf len
148-
self.last_error_offset = self.offset - len - 2;
148+
self.last_error_offset = self.offset - len as u64 - 2;
149149
Err(bang_type.to_err())
150150
}
151151
}
@@ -182,7 +182,7 @@ impl ReaderState {
182182

183183
// Report error at start of the end tag at `<` character
184184
// -2 for `<` and `>`
185-
self.last_error_offset = self.offset - buf.len() - 2;
185+
self.last_error_offset = self.offset - buf.len() as u64 - 2;
186186
return Err(Error::IllFormed(IllFormedError::MismatchedEndTag {
187187
expected,
188188
found: decoder.decode(name).unwrap_or_default().into_owned(),
@@ -195,7 +195,7 @@ impl ReaderState {
195195
None => {
196196
// Report error at start of the end tag at `<` character
197197
// -2 for `<` and `>`
198-
self.last_error_offset = self.offset - buf.len() - 2;
198+
self.last_error_offset = self.offset - buf.len() as u64 - 2;
199199
return Err(Error::IllFormed(IllFormedError::UnmatchedEndTag(
200200
decoder.decode(name).unwrap_or_default().into_owned(),
201201
)));
@@ -240,7 +240,7 @@ impl ReaderState {
240240
// <?....EOF
241241
// ^^^^^ - `buf` does not contains `<`, but we want to report error at `<`,
242242
// so we move offset to it (-2 for `<` and `>`)
243-
self.last_error_offset = self.offset - len - 2;
243+
self.last_error_offset = self.offset - len as u64 - 2;
244244
Err(Error::Syntax(SyntaxError::UnclosedPIOrXmlDecl))
245245
}
246246
}

0 commit comments

Comments
 (0)