Skip to content

Commit d684081

Browse files
committed
Introduce the slow_bruteforce_interpreter.
1 parent ba41b30 commit d684081

11 files changed

+1362
-134
lines changed

Cargo.toml

+2-1
Original file line numberDiff line numberDiff line change
@@ -13,10 +13,11 @@ readme = "README.md"
1313
description = "Grammar framework."
1414

1515
[dependencies]
16+
cyclotron = "0.0.3"
17+
elsa = "1.3.2"
1618
indexmap = "1"
1719
indexing = "0.3.2"
1820
proc-macro2 = "0.4.30"
19-
elsa = "1.3.2"
2021

2122
[lib]
2223
doctest = false

src/forest.rs

+142-13
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@ use std::collections::{BTreeSet, HashMap, VecDeque};
55
use std::fmt;
66
use std::hash::Hash;
77
use std::io::{self, Write};
8+
use std::iter;
9+
use std::rc::Rc;
810
use std::str;
911

1012
#[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
@@ -51,6 +53,10 @@ pub trait GrammarReflector {
5153

5254
fn node_shape(&self, kind: Self::NodeKind) -> NodeShape<Self::NodeKind>;
5355
fn node_desc(&self, kind: Self::NodeKind) -> String;
56+
57+
fn choice_by_index(&self, _kind: Self::NodeKind, _i: usize) -> Option<Self::NodeKind> {
58+
None
59+
}
5460
}
5561

5662
#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
@@ -72,25 +78,25 @@ impl<P: fmt::Debug> fmt::Debug for Node<'_, P> {
7278
}
7379

7480
/// A parse forest, in SPPF (Shared Packed Parse Forest) representation.
75-
pub struct ParseForest<'i, G: GrammarReflector, I: Input> {
81+
pub struct ParseForest<'i, P, G, I: Input> {
7682
pub grammar: G,
7783
// HACK(eddyb) `pub(crate)` only for `parser`.
7884
pub(crate) input: Container<'i, I::Container>,
79-
pub(crate) possible_choices: HashMap<Node<'i, G::NodeKind>, BTreeSet<G::NodeKind>>,
80-
pub(crate) possible_splits: HashMap<Node<'i, G::NodeKind>, BTreeSet<usize>>,
85+
pub(crate) possible_choices: HashMap<Node<'i, P>, BTreeSet<P>>,
86+
pub(crate) possible_splits: HashMap<Node<'i, P>, BTreeSet<usize>>,
8187
}
8288

8389
type_lambda! {
84-
pub type<'i> ParseForestL<G: GrammarReflector, I: Input> = ParseForest<'i, G, I>;
90+
pub type<'i> ParseForestL<P, G, I: Input> = ParseForest<'i, P, G, I>;
8591
pub type<'i> NodeL<P> = Node<'i, P>;
8692
}
8793

88-
pub type OwnedParseForestAndNode<G, P, I> = ExistsL<PairL<ParseForestL<G, I>, NodeL<P>>>;
94+
pub type OwnedParseForestAndNode<G, P, I> = ExistsL<PairL<ParseForestL<P, G, I>, NodeL<P>>>;
8995

9096
#[derive(Debug)]
9197
pub struct MoreThanOne;
9298

93-
impl<'i, P, G, I: Input> ParseForest<'i, G, I>
99+
impl<'i, P, G, I: Input> ParseForest<'i, P, G, I>
94100
where
95101
// FIXME(eddyb) these shouldn't be needed, as they are bounds on
96102
// `GrammarReflector::NodeKind`, but that's ignored currently.
@@ -225,14 +231,17 @@ where
225231
}
226232
}
227233

228-
pub fn dump_graphviz(&self, out: &mut dyn Write) -> io::Result<()> {
234+
pub fn dump_graphviz(&self, root: Option<Node<'i, P>>, out: &mut dyn Write) -> io::Result<()> {
229235
writeln!(out, "digraph forest {{")?;
230-
let mut queue: VecDeque<_> = self
231-
.possible_choices
232-
.keys()
233-
.chain(self.possible_splits.keys())
234-
.cloned()
235-
.collect();
236+
let mut queue: VecDeque<_> = match root {
237+
Some(root) => iter::once(root).collect(),
238+
None => self
239+
.possible_choices
240+
.keys()
241+
.chain(self.possible_splits.keys())
242+
.cloned()
243+
.collect(),
244+
};
236245
let mut seen: BTreeSet<_> = queue.iter().cloned().collect();
237246
let mut p = 0;
238247
let node_name = |Node { kind, range }| {
@@ -293,6 +302,126 @@ where
293302
}
294303
}
295304

305+
/// Inefficient expansion of a forest, for use when shapes are not statically known.
306+
// TODO(eddyb) cache these `Rc`s, or maybe even use something better?
307+
#[derive(Clone, Debug)]
308+
pub struct DynExpandedTree<'i, P> {
309+
pub node: Node<'i, P>,
310+
pub kind: DynExpandedTreeKind<'i, P>,
311+
}
312+
313+
#[derive(Clone, Debug)]
314+
pub enum DynExpandedTreeKind<'i, P> {
315+
Leaf,
316+
Or(P, Rc<DynExpandedTree<'i, P>>),
317+
Opt(Option<Rc<DynExpandedTree<'i, P>>>),
318+
Concat([Rc<DynExpandedTree<'i, P>>; 2]),
319+
}
320+
321+
impl<'i, P> DynExpandedTree<'i, P>
322+
// FIXME(eddyb) these shouldn't be needed, as they are bounds on
323+
// `GrammarReflector::NodeKind`, but that's ignored currently.
324+
where
325+
P: fmt::Debug + Ord + Hash + Copy,
326+
{
327+
pub fn one_from_node<G, I>(
328+
forest: &ParseForest<'i, P, G, I>,
329+
node: Node<'i, P>,
330+
) -> Result<Rc<Self>, MoreThanOne>
331+
where
332+
G: GrammarReflector<NodeKind = P>,
333+
I: Input,
334+
{
335+
let kind = match forest.grammar.node_shape(node.kind) {
336+
NodeShape::Opaque | NodeShape::Alias(_) => DynExpandedTreeKind::Leaf,
337+
NodeShape::Choice => {
338+
let child = forest.one_choice(node)?;
339+
DynExpandedTreeKind::Or(child.kind, Self::one_from_node(forest, child)?)
340+
}
341+
NodeShape::Opt(_) => DynExpandedTreeKind::Opt(match forest.unpack_opt(node) {
342+
Some(child) => Some(Self::one_from_node(forest, child)?),
343+
None => None,
344+
}),
345+
NodeShape::Split(..) => {
346+
let (left, right) = forest.one_split(node)?;
347+
DynExpandedTreeKind::Concat([
348+
Self::one_from_node(forest, left)?,
349+
Self::one_from_node(forest, right)?,
350+
])
351+
}
352+
};
353+
Ok(Rc::new(DynExpandedTree { node, kind }))
354+
}
355+
356+
pub fn all_from_node<G, I>(
357+
forest: &ParseForest<'i, P, G, I>,
358+
node: Node<'i, P>,
359+
) -> Vec<Rc<Self>>
360+
where
361+
G: GrammarReflector<NodeKind = P>,
362+
I: Input,
363+
{
364+
let new = |kind| Rc::new(DynExpandedTree { node, kind });
365+
match forest.grammar.node_shape(node.kind) {
366+
NodeShape::Opaque | NodeShape::Alias(_) => vec![new(DynExpandedTreeKind::Leaf)],
367+
NodeShape::Choice => forest
368+
.all_choices(node)
369+
.flat_map(|child| {
370+
Self::all_from_node(forest, child)
371+
.into_iter()
372+
.map(move |child_tree| new(DynExpandedTreeKind::Or(child.kind, child_tree)))
373+
})
374+
.collect(),
375+
NodeShape::Opt(_) => match forest.unpack_opt(node) {
376+
Some(child) => Self::all_from_node(forest, child)
377+
.into_iter()
378+
.map(|child_tree| new(DynExpandedTreeKind::Opt(Some(child_tree))))
379+
.collect(),
380+
None => vec![new(DynExpandedTreeKind::Opt(None))],
381+
},
382+
NodeShape::Split(..) => forest
383+
.all_splits(node)
384+
.flat_map(|(left, right)| {
385+
Self::all_from_node(forest, left)
386+
.into_iter()
387+
.flat_map(move |left_tree| {
388+
Self::all_from_node(forest, right)
389+
.into_iter()
390+
.map(move |right_tree| {
391+
new(DynExpandedTreeKind::Concat([
392+
left_tree.clone(),
393+
right_tree,
394+
]))
395+
})
396+
})
397+
})
398+
.collect(),
399+
}
400+
}
401+
402+
pub fn get<G, I>(&self, forest: &ParseForest<'i, P, G, I>, i: usize) -> Option<Rc<Self>>
403+
where
404+
G: GrammarReflector<NodeKind = P>,
405+
I: Input,
406+
{
407+
match &self.kind {
408+
DynExpandedTreeKind::Leaf => unreachable!(),
409+
DynExpandedTreeKind::Or(child, child_tree) => {
410+
if forest.grammar.choice_by_index(self.node.kind, i).unwrap() == *child {
411+
Some(child_tree.clone())
412+
} else {
413+
None
414+
}
415+
}
416+
DynExpandedTreeKind::Opt(child) => {
417+
assert_eq!(i, 0);
418+
child.clone()
419+
}
420+
DynExpandedTreeKind::Concat(children) => Some(children[i].clone()),
421+
}
422+
}
423+
}
424+
296425
// FIXME(rust-lang/rust#54175) work around iterator adapter compile-time
297426
// blowup issues by using a makeshift "non-determinism arrow toolkit".
298427
pub mod nd {

src/lib.rs

+4-98
Original file line numberDiff line numberDiff line change
@@ -14,13 +14,17 @@ pub mod forest;
1414
#[forbid(unsafe_code)]
1515
pub mod input;
1616
#[forbid(unsafe_code)]
17+
pub mod lyg;
18+
#[forbid(unsafe_code)]
1719
pub mod parser;
1820
#[forbid(unsafe_code)]
1921
pub mod proc_macro;
2022
#[forbid(unsafe_code)]
2123
pub mod rule;
2224
#[forbid(unsafe_code)]
2325
pub mod scannerless;
26+
#[forbid(unsafe_code)]
27+
pub mod slow_bruteforce_interpreter;
2428

2529
// HACK(eddyb) this contains impls for types in `proc_macro`, which depend on
2630
// `input`, collapse this back into `proc_macro`.
@@ -78,101 +82,3 @@ impl Grammar {
7882
}
7983
}
8084
}
81-
82-
/// Construct a (meta-)grammar for parsing a grammar.
83-
pub fn grammar_grammar<Pat: Eq + Hash + From<&'static str>>(cx: &Context<Pat>) -> Grammar {
84-
use crate::rule::*;
85-
86-
// HACK(eddyb) more explicit subset of the grammar, for bootstrapping.
87-
macro_rules! rule {
88-
({ $start:tt ..= $end:tt }) => {
89-
eat($start..=$end)
90-
};
91-
({ ! $pat:tt }) => {
92-
negative_lookahead($pat)
93-
};
94-
({ ! $start:tt ..= $end:tt }) => {
95-
negative_lookahead($start..=$end)
96-
};
97-
($rule:ident) => {
98-
call(stringify!($rule))
99-
};
100-
({ $name:ident : $rule:tt }) => {
101-
rule!($rule).field(stringify!($name))
102-
};
103-
({ $rule:tt ? }) => {
104-
rule!($rule).opt()
105-
};
106-
({ $elem:tt * }) => {
107-
rule!($elem).repeat_many()
108-
};
109-
({ $elem:tt + }) => {
110-
rule!($elem).repeat_more()
111-
};
112-
({ $elem:tt + % $sep:tt }) => {
113-
rule!($elem).repeat_more_sep(rule!($sep), SepKind::Simple)
114-
};
115-
({ $rule0:tt $(| $rule:tt)+ }) => {
116-
rule!($rule0) $(| rule!($rule))+
117-
};
118-
({ $rule0:tt $($rule:tt)* }) => {
119-
rule!($rule0) $(+ rule!($rule))*
120-
};
121-
($pat:expr) => {
122-
eat($pat)
123-
};
124-
}
125-
126-
macro_rules! grammar {
127-
($($rule_name:ident = $($rule:tt)|+;)*) => ({
128-
let mut grammar = Grammar::new();
129-
$(grammar.define(
130-
cx.intern(stringify!($rule_name)),
131-
rule!({ $($rule)|+ }).finish(cx),
132-
);)*
133-
grammar
134-
})
135-
}
136-
137-
// Main grammar.
138-
let mut grammar = grammar! {
139-
Grammar = { FileStart {rules:{RuleDef*}} FileEnd };
140-
RuleDef = { {name:Ident} "=" {rule:Or} ";" };
141-
Or = {{"|"?} {rules:{Concat+ % "|"}}};
142-
Concat = {rules:{Rule+}};
143-
Rule = { {{ {field:Ident} ":" }?} {rule:Primary} {{modifier:Modifier}?} };
144-
Primary =
145-
{Eat:Pattern} |
146-
{Call:Ident} |
147-
{Group:{ "{" {{or:Or}?} "}" }};
148-
Modifier =
149-
{Opt:"?"} |
150-
{Repeat:{ {repeat:Repeat} {{ {kind:SepKind} {sep:Primary} }?} }};
151-
Repeat =
152-
{Many:"*"} |
153-
{More:"+"};
154-
SepKind =
155-
{Simple:"%"} |
156-
// HACK(eddyb) should be "%%", but `rustc`'s `proc_macro` server doesn't
157-
// always preserve jointness, except within multi-character Rust operators.
158-
{Trailing:{"%" "%"}};
159-
Pattern =
160-
{Str:StrLit} |
161-
{CharRange:{ {{start:CharLit}?} ".." {{end:CharLit}?} }} |
162-
{CharRangeInclusive:{ {{start:CharLit}?} "..=" {end:CharLit} }};
163-
};
164-
165-
// Lexical fragment of the grammar.
166-
grammar.extend(grammar! {
167-
FileStart = "";
168-
FileEnd = "";
169-
170-
Ident = IDENT;
171-
172-
// FIXME(eddyb) restrict literals, once `proc_macro` allows it.
173-
StrLit = LITERAL;
174-
CharLit = LITERAL;
175-
});
176-
177-
grammar
178-
}

0 commit comments

Comments
 (0)