Skip to content

Commit 2b250d6

Browse files
authored
feat: index selector engine support (#132)
- The automaton transition model has been changed to incorporate index-labelled transitions. - Both engines now support queries with the index selector. Ref: #132
1 parent 953763a commit 2b250d6

20 files changed

+851
-274
lines changed

.envrc

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
use_nix

Justfile

+4
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,10 @@ test-classifier:
9797
test-engine:
9898
cargo test --test engine_correctness_tests
9999

100+
# Run the query tests on default features.
101+
test-parser:
102+
cargo test --test query_parser_tests
103+
100104
# Run all tests, including real dataset tests, on the feature powerset of the project.
101105
test-full:
102106
-cargo install cargo-hack

crates/rsonpath-lib/src/engine.rs

-1
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@ pub mod main;
1111
pub mod recursive;
1212
#[cfg(feature = "tail-skip")]
1313
mod tail_skipping;
14-
1514
pub use main::MainEngine as RsonpathEngine;
1615

1716
use self::error::EngineError;

crates/rsonpath-lib/src/engine/head_skipping.rs

+11-7
Original file line numberDiff line numberDiff line change
@@ -85,13 +85,17 @@ impl<'b, 'q, I: Input> HeadSkip<'b, 'q, I, BLOCK_SIZE> {
8585

8686
if fallback_state == initial_state && transitions.len() == 1 {
8787
let (label, target_state) = transitions[0];
88-
debug!("Automaton starts with a descendant search, using memmem heuristic.");
89-
return Some(Self {
90-
bytes,
91-
state: target_state,
92-
is_accepting: automaton.is_accepting(target_state),
93-
label,
94-
});
88+
89+
if let Some(named_label) = label.get_label() {
90+
debug!("Automaton starts with a descendant search, using memmem heuristic.");
91+
92+
return Some(Self {
93+
bytes,
94+
state: target_state,
95+
is_accepting: automaton.is_accepting(target_state),
96+
label: named_label,
97+
});
98+
}
9599
}
96100

97101
None

crates/rsonpath-lib/src/engine/main.rs

+137-38
Original file line numberDiff line numberDiff line change
@@ -12,10 +12,6 @@ use super::head_skipping::{CanHeadSkip, HeadSkip};
1212
use super::Compiler;
1313
#[cfg(feature = "head-skip")]
1414
use crate::classification::ResumeClassifierState;
15-
use crate::classification::{
16-
quotes::{classify_quoted_sequences, QuoteClassifiedIterator},
17-
structural::{classify_structural_characters, BracketType, Structural, StructuralIterator},
18-
};
1915
use crate::debug;
2016
use crate::engine::depth::Depth;
2117
use crate::engine::error::EngineError;
@@ -24,9 +20,16 @@ use crate::engine::tail_skipping::TailSkip;
2420
use crate::engine::{Engine, Input};
2521
use crate::query::automaton::{Automaton, State};
2622
use crate::query::error::CompilerError;
27-
use crate::query::{JsonPathQuery, Label};
23+
use crate::query::{JsonPathQuery, Label, NonNegativeArrayIndex};
2824
use crate::result::QueryResult;
2925
use crate::BLOCK_SIZE;
26+
use crate::{
27+
classification::{
28+
quotes::{classify_quoted_sequences, QuoteClassifiedIterator},
29+
structural::{classify_structural_characters, BracketType, Structural, StructuralIterator},
30+
},
31+
query::automaton::TransitionLabel,
32+
};
3033
use smallvec::{smallvec, SmallVec};
3134

3235
/// Main engine for a fixed JSONPath query.
@@ -102,6 +105,9 @@ struct Executor<'q, 'b, I: Input> {
102105
bytes: &'b I,
103106
next_event: Option<Structural>,
104107
is_list: bool,
108+
array_count: NonNegativeArrayIndex,
109+
has_any_array_item_transition: bool,
110+
has_any_array_item_transition_to_accepting: bool,
105111
}
106112

107113
fn query_executor<'q, 'b, I: Input>(
@@ -116,6 +122,9 @@ fn query_executor<'q, 'b, I: Input>(
116122
bytes,
117123
next_event: None,
118124
is_list: false,
125+
array_count: NonNegativeArrayIndex::ZERO,
126+
has_any_array_item_transition: false,
127+
has_any_array_item_transition_to_accepting: false,
119128
}
120129
}
121130

@@ -203,10 +212,15 @@ impl<'q, 'b, I: Input> Executor<'q, 'b, I> {
203212
let mut any_matched = false;
204213

205214
for &(label, target) in self.automaton[self.state].transitions() {
206-
if self.automaton.is_accepting(target) && self.is_match(idx, label)? {
207-
result.report(idx);
208-
any_matched = true;
209-
break;
215+
match label {
216+
TransitionLabel::ArrayIndex(_) => {}
217+
TransitionLabel::ObjectMember(label) => {
218+
if self.automaton.is_accepting(target) && self.is_match(idx, label)? {
219+
result.report(idx);
220+
any_matched = true;
221+
break;
222+
}
223+
}
210224
}
211225
}
212226
let fallback_state = self.automaton[self.state].fallback_state();
@@ -240,13 +254,32 @@ impl<'q, 'b, I: Input> Executor<'q, 'b, I> {
240254
R: QueryResult,
241255
{
242256
self.next_event = classifier.next();
257+
243258
let is_next_opening = self.next_event.map_or(false, |s| s.is_opening());
244259

245-
if !is_next_opening {
246-
let fallback_state = self.automaton[self.state].fallback_state();
247-
if self.is_list && self.automaton.is_accepting(fallback_state) {
248-
result.report(idx);
249-
}
260+
let is_fallback_accepting = self
261+
.automaton
262+
.is_accepting(self.automaton[self.state].fallback_state());
263+
264+
if !is_next_opening && self.is_list && is_fallback_accepting {
265+
debug!("Accepting on comma.");
266+
result.report(idx);
267+
}
268+
269+
// After wildcard, check for a matching array index.
270+
// If the index increment exceeds the field's limit, give up.
271+
if self.is_list && self.array_count.try_increment().is_err() {
272+
return Ok(());
273+
}
274+
debug!("Incremented array count to {}", self.array_count);
275+
276+
let match_index = self
277+
.automaton
278+
.has_array_index_transition_to_accepting(self.state, &self.array_count);
279+
280+
if !is_next_opening && match_index {
281+
debug!("Accepting on list item.");
282+
result.report(idx);
250283
}
251284

252285
Ok(())
@@ -267,15 +300,32 @@ impl<'q, 'b, I: Input> Executor<'q, 'b, I> {
267300
debug!("Opening {bracket_type:?}, increasing depth and pushing stack.",);
268301
let mut any_matched = false;
269302

270-
if let Some(colon_idx) = self.find_preceding_colon(idx) {
271-
for &(label, target) in self.automaton[self.state].transitions() {
272-
if self.is_match(colon_idx, label)? {
273-
any_matched = true;
274-
self.transition_to(target, bracket_type);
275-
if self.automaton.is_accepting(target) {
276-
result.report(colon_idx);
303+
let colon_idx = self.find_preceding_colon(idx);
304+
305+
for &(label, target) in self.automaton[self.state].transitions() {
306+
match label {
307+
TransitionLabel::ArrayIndex(i) => {
308+
if self.is_list && i.eq(&self.array_count) {
309+
any_matched = true;
310+
self.transition_to(target, bracket_type);
311+
if self.automaton.is_accepting(target) {
312+
debug!("Accept {idx}");
313+
result.report(idx);
314+
}
315+
break;
316+
}
317+
}
318+
TransitionLabel::ObjectMember(label) => {
319+
if let Some(colon_idx) = colon_idx {
320+
if self.is_match(colon_idx, label)? {
321+
any_matched = true;
322+
self.transition_to(target, bracket_type);
323+
if self.automaton.is_accepting(target) {
324+
result.report(colon_idx);
325+
}
326+
break;
327+
}
277328
}
278-
break;
279329
}
280330
}
281331
}
@@ -301,29 +351,51 @@ impl<'q, 'b, I: Input> Executor<'q, 'b, I> {
301351

302352
if bracket_type == BracketType::Square {
303353
self.is_list = true;
354+
self.has_any_array_item_transition =
355+
self.automaton.has_any_array_item_transition(self.state);
356+
self.has_any_array_item_transition_to_accepting = self
357+
.automaton
358+
.has_any_array_item_transition_to_accepting(self.state);
304359

305360
let fallback = self.automaton[self.state].fallback_state();
306-
if self.automaton.is_accepting(fallback) {
361+
let is_fallback_accepting = self.automaton.is_accepting(fallback);
362+
363+
let searching_list = is_fallback_accepting || self.has_any_array_item_transition;
364+
365+
if searching_list {
307366
classifier.turn_commas_on(idx);
308-
self.next_event = classifier.next();
309-
match self.next_event {
310-
Some(Structural::Closing(_, close_idx)) => {
311-
if let Some((next_idx, _)) = self.bytes.seek_non_whitespace_forward(idx + 1)
312-
{
313-
if next_idx < close_idx {
314-
result.report(next_idx);
367+
self.array_count = NonNegativeArrayIndex::ZERO;
368+
debug!("Initialized array count to {}", self.array_count);
369+
370+
let wants_first_item = is_fallback_accepting
371+
|| self
372+
.automaton
373+
.has_first_array_index_transition_to_accepting(self.state);
374+
375+
if wants_first_item {
376+
self.next_event = classifier.next();
377+
378+
match self.next_event {
379+
Some(Structural::Closing(_, close_idx)) => {
380+
if let Some((next_idx, _)) =
381+
self.bytes.seek_non_whitespace_forward(idx + 1)
382+
{
383+
if next_idx < close_idx {
384+
result.report(next_idx);
385+
}
315386
}
316387
}
388+
Some(Structural::Comma(_)) => {
389+
result.report(idx + 1);
390+
}
391+
_ => (),
317392
}
318-
Some(Structural::Comma(_)) => {
319-
result.report(idx + 1);
320-
}
321-
_ => (),
322393
}
323394
} else {
324395
classifier.turn_commas_off();
325396
}
326397
} else {
398+
classifier.turn_commas_off();
327399
self.is_list = false;
328400
}
329401

@@ -359,6 +431,12 @@ impl<'q, 'b, I: Input> Executor<'q, 'b, I> {
359431
if let Some(stack_frame) = self.stack.pop_if_at_or_below(*self.depth) {
360432
self.state = stack_frame.state;
361433
self.is_list = stack_frame.is_list;
434+
self.array_count = stack_frame.array_count;
435+
self.has_any_array_item_transition = stack_frame.has_any_array_item_transition;
436+
self.has_any_array_item_transition_to_accepting =
437+
stack_frame.has_any_array_item_transition_to_accepting;
438+
439+
debug!("Restored array count to {}", self.array_count);
362440

363441
if self.automaton.is_unitary(self.state) {
364442
let bracket_type = self.current_node_bracket_type();
@@ -369,6 +447,7 @@ impl<'q, 'b, I: Input> Executor<'q, 'b, I> {
369447
}
370448
}
371449
}
450+
372451
#[cfg(not(feature = "unique-labels"))]
373452
{
374453
self.depth
@@ -378,13 +457,20 @@ impl<'q, 'b, I: Input> Executor<'q, 'b, I> {
378457
if let Some(stack_frame) = self.stack.pop_if_at_or_below(*self.depth) {
379458
self.state = stack_frame.state;
380459
self.is_list = stack_frame.is_list;
460+
self.array_count = stack_frame.array_count;
461+
self.has_any_array_item_transition = stack_frame.has_any_array_item_transition;
462+
self.has_any_array_item_transition_to_accepting =
463+
stack_frame.has_any_array_item_transition_to_accepting;
464+
465+
debug!("Restored array count to {}", self.array_count);
381466
}
382467
}
383468

384469
if self.is_list
385-
&& self
470+
&& (self
386471
.automaton
387472
.is_accepting(self.automaton[self.state].fallback_state())
473+
|| self.has_any_array_item_transition)
388474
{
389475
classifier.turn_commas_on(idx);
390476
} else {
@@ -402,15 +488,25 @@ impl<'q, 'b, I: Input> Executor<'q, 'b, I> {
402488

403489
fn transition_to(&mut self, target: State, opening: BracketType) {
404490
let target_is_list = opening == BracketType::Square;
405-
if target != self.state || target_is_list != self.is_list {
491+
492+
let fallback = self.automaton[self.state].fallback_state();
493+
let is_fallback_accepting = self.automaton.is_accepting(fallback);
494+
let searching_list = is_fallback_accepting || self.has_any_array_item_transition;
495+
496+
if target != self.state || target_is_list != self.is_list || searching_list {
406497
debug!(
407-
"push {}, goto {target}, is_list = {target_is_list}",
408-
self.state
498+
"push {}, goto {target}, is_list = {target_is_list}, array_count: {}",
499+
self.state, self.array_count
409500
);
501+
410502
self.stack.push(StackFrame {
411503
depth: *self.depth,
412504
state: self.state,
413505
is_list: self.is_list,
506+
array_count: self.array_count,
507+
has_any_array_item_transition: self.has_any_array_item_transition,
508+
has_any_array_item_transition_to_accepting: self
509+
.has_any_array_item_transition_to_accepting,
414510
});
415511
self.state = target;
416512
}
@@ -467,6 +563,9 @@ struct StackFrame {
467563
depth: u8,
468564
state: State,
469565
is_list: bool,
566+
array_count: NonNegativeArrayIndex,
567+
has_any_array_item_transition: bool,
568+
has_any_array_item_transition_to_accepting: bool,
470569
}
471570

472571
#[derive(Debug)]

0 commit comments

Comments
 (0)