mirror of
https://github.com/fluencelabs/lalrpop
synced 2025-04-24 18:52:16 +00:00
add support for LALR(1)
This commit is contained in:
parent
0e46b4bbcd
commit
e02aaef522
@ -44,10 +44,10 @@ build = "build.rs" # LALRPOP preprocessing
|
||||
|
||||
# Add a dependency on the LALRPOP runtime library:
|
||||
[dependencies.lalrpop-util]
|
||||
version = "0.1"
|
||||
version = "0.2.0"
|
||||
|
||||
[build-dependencies.lalrpop]
|
||||
version = "0.1"
|
||||
version = "0.2.0"
|
||||
```
|
||||
|
||||
And create a `build.rs` file that looks like:
|
||||
|
@ -1,4 +1,7 @@
|
||||
[package]
|
||||
name = "lalrpop-intern"
|
||||
version = "0.2.0" # LALRPOP
|
||||
description = "Simple string interner used by LALRPOP"
|
||||
repository = "https://github.com/nikomatsakis/lalrpop"
|
||||
license = "Unlicense"
|
||||
authors = ["Niko Matsakis <niko@alum.mit.edu>"]
|
||||
|
32
lalrpop-test/src/expr_lalr.lalrpop
Normal file
32
lalrpop-test/src/expr_lalr.lalrpop
Normal file
@ -0,0 +1,32 @@
|
||||
grammar(scale: i32)["LALR(1)"];
|
||||
|
||||
use util::tok::Tok;
|
||||
|
||||
extern token {
|
||||
enum Tok {
|
||||
"(" => Tok::LParen(..),
|
||||
")" => Tok::RParen(..),
|
||||
"-" => Tok::Minus(..),
|
||||
"+" => Tok::Plus(..),
|
||||
"*" => Tok::Times(..),
|
||||
"/" => Tok::Div(..),
|
||||
"Num" => Tok::Num(<i32>)
|
||||
}
|
||||
}
|
||||
|
||||
pub Expr = {
|
||||
<l:Expr> "-" <r:Factor> => l - r;
|
||||
<l:Expr> "+" <r:Factor> => l + r;
|
||||
Factor;
|
||||
};
|
||||
|
||||
Factor = {
|
||||
<l:Factor> "*" <r:Term> => l * r;
|
||||
<l:Factor> "/" <r:Term> => l / r;
|
||||
Term;
|
||||
};
|
||||
|
||||
Term: i32 = {
|
||||
<n:"Num"> => n * scale;
|
||||
"(" <Expr> ")";
|
||||
};
|
5
lalrpop-test/src/expr_lalr.rs
Normal file
5
lalrpop-test/src/expr_lalr.rs
Normal file
@ -0,0 +1,5 @@
|
||||
#![allow(unused_imports)]
|
||||
#![allow(unused_variables)]
|
||||
use util::tok::Tok;
|
||||
extern crate lalrpop_util as __lalrpop_util;
|
||||
use self::__lalrpop_util::ParseError as __ParseError;
|
@ -7,6 +7,9 @@ mod sub;
|
||||
/// more interesting demonstration of parsing full expressions
|
||||
mod expr;
|
||||
|
||||
/// more interesting demonstration of parsing full expressions
|
||||
mod expr_lalr;
|
||||
|
||||
/// test that passes in lifetime/type/formal parameters and threads
|
||||
/// them through, building an AST from the result
|
||||
mod expr_arena;
|
||||
@ -54,6 +57,31 @@ fn expr_test5() {
|
||||
util::test(|v| expr::parse_Expr(11, v), "22 * 3 - 6", 22*11 * 3*11 - 6*11);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn expr_lalr_test1() {
|
||||
util::test(|v| expr::parse_Expr(1, v), "22 - 3", 22 - 3);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn expr_lalr_test2() {
|
||||
util::test(|v| expr::parse_Expr(1, v), "22 - (3 + 5)", 22 - (3 + 5));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn expr_lalr_test3() {
|
||||
util::test(|v| expr::parse_Expr(1, v), "22 - (3 - 5) - 13", 22 - (3 - 5) - 13);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn expr_lalr_test4() {
|
||||
util::test(|v| expr::parse_Expr(1, v), "22 * 3 - 6", 22 * 3 - 6);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn expr_lalr_test5() {
|
||||
util::test(|v| expr::parse_Expr(11, v), "22 * 3 - 6", 22*11 * 3*11 - 6*11);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn sub_test1() {
|
||||
util::test(sub::parse_S, "22 - 3", 22 - 3);
|
||||
|
@ -25,12 +25,57 @@ fn process_dir<P:AsRef<Path>>(root_dir: P) -> io::Result<()> {
|
||||
let lalrpop_files = try!(lalrpop_files(root_dir));
|
||||
for lalrpop_file in lalrpop_files {
|
||||
let rs_file = lalrpop_file.with_extension("rs");
|
||||
let grammar = try!(parse_and_normalize_grammar(lalrpop_file));
|
||||
try!(emit_recursive_ascent(&rs_file, &grammar));
|
||||
if try!(needs_rebuild(&lalrpop_file, &rs_file)) {
|
||||
let grammar = try!(parse_and_normalize_grammar(lalrpop_file));
|
||||
try!(emit_recursive_ascent(&rs_file, &grammar));
|
||||
try!(make_read_only(&rs_file));
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn needs_rebuild(lalrpop_file: &Path,
|
||||
rs_file: &Path)
|
||||
-> io::Result<bool>
|
||||
{
|
||||
return match fs::metadata(&rs_file) {
|
||||
Ok(rs_metadata) => {
|
||||
let lalrpop_metadata = try!(fs::metadata(&lalrpop_file));
|
||||
Ok(compare_modification_times(&lalrpop_metadata, &rs_metadata))
|
||||
}
|
||||
Err(e) => {
|
||||
match e.kind() {
|
||||
io::ErrorKind::NotFound => Ok(true),
|
||||
_ => Err(e),
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
#[cfg(unix)]
|
||||
fn compare_modification_times(lalrpop_metadata: &fs::Metadata,
|
||||
rs_metadata: &fs::Metadata)
|
||||
-> bool
|
||||
{
|
||||
use std::os::unix::fs::MetadataExt;
|
||||
lalrpop_metadata.mtime() >= rs_metadata.mtime()
|
||||
}
|
||||
|
||||
#[cfg(not(unix))]
|
||||
fn compare_modification_times(lalrpop_metadata: &fs::Metadata,
|
||||
rs_metadata: &fs::Metadata)
|
||||
-> bool
|
||||
{
|
||||
true
|
||||
}
|
||||
}
|
||||
|
||||
fn make_read_only(rs_file: &Path) -> io::Result<()> {
|
||||
let rs_metadata = try!(fs::metadata(&rs_file));
|
||||
let mut rs_permissions = rs_metadata.permissions();
|
||||
rs_permissions.set_readonly(true);
|
||||
fs::set_permissions(&rs_file, rs_permissions)
|
||||
}
|
||||
|
||||
fn lalrpop_files<P:AsRef<Path>>(root_dir: P) -> io::Result<Vec<PathBuf>> {
|
||||
let mut result = vec![];
|
||||
for entry in try!(fs::read_dir(root_dir)) {
|
||||
|
@ -18,6 +18,13 @@ pub struct Grammar {
|
||||
pub parameters: Vec<Parameter>,
|
||||
pub where_clauses: Vec<String>,
|
||||
pub items: Vec<GrammarItem>,
|
||||
pub algorithm: Option<Algorithm>,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, PartialEq, Eq)]
|
||||
pub struct Algorithm {
|
||||
pub span: Span,
|
||||
pub text: InternedString,
|
||||
}
|
||||
|
||||
#[derive(Copy, Clone, Debug, Hash, PartialEq, Eq, PartialOrd, Ord)]
|
||||
|
@ -4,7 +4,7 @@
|
||||
* representation incrementally.
|
||||
*/
|
||||
|
||||
use intern::{InternedString};
|
||||
use intern::{self, InternedString};
|
||||
use grammar::pattern::{Pattern};
|
||||
use std::fmt::{Debug, Display, Formatter, Error};
|
||||
use util::{map, Map, Sep};
|
||||
@ -21,6 +21,9 @@ pub struct Grammar {
|
||||
// that they do not conflict with any action strings
|
||||
pub prefix: String,
|
||||
|
||||
// algorithm user requested for this parser
|
||||
pub algorithm: Algorithm,
|
||||
|
||||
// these are the nonterminals that were declared to be public; the
|
||||
// key is the user's name for the symbol, the value is the
|
||||
// artificial symbol we introduce, which will always have a single
|
||||
@ -48,6 +51,12 @@ pub struct Grammar {
|
||||
pub types: Types,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, PartialEq, Eq)]
|
||||
pub enum Algorithm {
|
||||
LR1,
|
||||
LALR1,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, PartialEq, Eq)]
|
||||
pub struct Parameter {
|
||||
pub name: InternedString,
|
||||
@ -323,3 +332,12 @@ impl Grammar {
|
||||
}
|
||||
}
|
||||
|
||||
impl Algorithm {
|
||||
pub fn from_str(s: InternedString) -> Option<Algorithm> {
|
||||
intern::read(|r| match r.data(s) {
|
||||
"LR" | "LR(1)" => Some(Algorithm::LR1),
|
||||
"LALR" | "LALR(1)" => Some(Algorithm::LALR1),
|
||||
_ => None,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
156
lalrpop/src/lr1/core/mod.rs
Normal file
156
lalrpop/src/lr1/core/mod.rs
Normal file
@ -0,0 +1,156 @@
|
||||
//! Core LR(1) state construction algorithm.
|
||||
|
||||
use kernel_set;
|
||||
use grammar::repr::*;
|
||||
use lr1::first;
|
||||
use lr1::{Action, Lookahead, Item, Items, State, StateIndex, TableConstructionError};
|
||||
use std::rc::Rc;
|
||||
use util::{map, Multimap, Set};
|
||||
|
||||
#[cfg(test)] mod test;
|
||||
|
||||
pub fn build_lr1_states<'grammar>(grammar: &'grammar Grammar,
|
||||
start: NonterminalString)
|
||||
-> Result<Vec<State<'grammar>>,
|
||||
TableConstructionError<'grammar>>
|
||||
{
|
||||
let lr1 = LR1::new(grammar);
|
||||
lr1.build_states(start)
|
||||
}
|
||||
|
||||
struct LR1<'grammar> {
|
||||
grammar: &'grammar Grammar,
|
||||
first_sets: first::FirstSets,
|
||||
}
|
||||
|
||||
impl<'grammar> LR1<'grammar> {
|
||||
fn new(grammar: &'grammar Grammar) -> LR1 {
|
||||
LR1 {
|
||||
grammar: grammar,
|
||||
first_sets: first::FirstSets::new(grammar),
|
||||
}
|
||||
}
|
||||
|
||||
fn build_states(&self, start_nt: NonterminalString)
|
||||
-> Result<Vec<State<'grammar>>, TableConstructionError<'grammar>>
|
||||
{
|
||||
let mut kernel_set = kernel_set::KernelSet::new();
|
||||
let mut states = vec![];
|
||||
|
||||
// create the starting state
|
||||
kernel_set.add_state(
|
||||
self.transitive_closure(
|
||||
self.items(start_nt, 0, Lookahead::EOF)));
|
||||
|
||||
while let Some(items) = kernel_set.next() {
|
||||
let index = StateIndex(states.len());
|
||||
let mut this_state = State { index: index, items: items.clone(),
|
||||
tokens: map(), gotos: map() };
|
||||
|
||||
// group the items that we can transition into by shifting
|
||||
// over a term or nonterm
|
||||
let transitions: Multimap<Symbol, Item<'grammar>> =
|
||||
items.vec
|
||||
.iter()
|
||||
.filter_map(|item| item.shifted_item())
|
||||
.collect();
|
||||
|
||||
for (symbol, items) in transitions.into_iter() {
|
||||
let items = self.transitive_closure(items);
|
||||
let next_state = kernel_set.add_state(items);
|
||||
|
||||
match symbol {
|
||||
Symbol::Terminal(s) => {
|
||||
let action = Action::Shift(next_state);
|
||||
let prev = this_state.tokens.insert(Lookahead::Terminal(s), action);
|
||||
assert!(prev.is_none()); // cannot have a shift/shift conflict
|
||||
}
|
||||
|
||||
Symbol::Nonterminal(s) => {
|
||||
let prev = this_state.gotos.insert(s, next_state);
|
||||
assert!(prev.is_none());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// finally, consider the reductions
|
||||
for item in items.vec.iter().filter(|i| i.can_reduce()) {
|
||||
let action = Action::Reduce(item.production);
|
||||
let prev = this_state.tokens.insert(item.lookahead, action);
|
||||
if let Some(conflict) = prev {
|
||||
return Err(TableConstructionError {
|
||||
items: items.clone(),
|
||||
lookahead: item.lookahead,
|
||||
production: item.production,
|
||||
conflict: conflict,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// extract a new state
|
||||
states.push(this_state);
|
||||
}
|
||||
|
||||
Ok(states)
|
||||
}
|
||||
|
||||
fn items(&self,
|
||||
id: NonterminalString,
|
||||
index: usize,
|
||||
lookahead: Lookahead)
|
||||
-> Vec<Item<'grammar>>
|
||||
{
|
||||
self.grammar.productions_for(id)
|
||||
.iter()
|
||||
.map(|production| {
|
||||
debug_assert!(index <= production.symbols.len());
|
||||
Item { production: production,
|
||||
index: index,
|
||||
lookahead: lookahead }
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
// expands `state` with epsilon moves
|
||||
fn transitive_closure(&self, mut items: Vec<Item<'grammar>>)
|
||||
-> Items<'grammar>
|
||||
{
|
||||
let mut counter = 0;
|
||||
|
||||
let mut set: Set<Item<'grammar>> =
|
||||
items.iter().cloned().collect();
|
||||
|
||||
while counter < items.len() {
|
||||
let new_items: Vec<_> =
|
||||
items[counter..]
|
||||
.iter()
|
||||
.filter_map(|item| {
|
||||
let shift_symbol = item.shift_symbol();
|
||||
match shift_symbol {
|
||||
None => None, // requires a reduce
|
||||
Some((Symbol::Terminal(_), _)) => None, // requires a shift
|
||||
Some((Symbol::Nonterminal(nt), remainder)) => {
|
||||
Some((nt, remainder, item.lookahead))
|
||||
}
|
||||
}
|
||||
})
|
||||
.flat_map(|(nt, remainder, lookahead)| {
|
||||
let first_set = self.first_sets.first(remainder, lookahead);
|
||||
first_set.into_iter()
|
||||
.flat_map(move |l| self.items(nt, 0, l))
|
||||
})
|
||||
.filter(|&item| set.insert(item))
|
||||
.collect();
|
||||
|
||||
counter = items.len();
|
||||
items.extend(new_items);
|
||||
}
|
||||
|
||||
items.sort();
|
||||
items.dedup();
|
||||
|
||||
Items { vec: Rc::new(items) }
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -2,9 +2,10 @@ use intern::intern;
|
||||
use generate;
|
||||
use grammar::repr::*;
|
||||
use test_util::{compare, expect_debug, normalized_grammar};
|
||||
use super::{build_states, State, Items, Lookahead, LR1};
|
||||
use super::Lookahead::EOF;
|
||||
use super::interpret::interpret;
|
||||
use lr1::Lookahead::EOF;
|
||||
use lr1::interpret::interpret;
|
||||
use lr1::core::{LR1, build_lr1_states};
|
||||
use lr1::{State, Items, Lookahead};
|
||||
|
||||
fn nt(t: &str) -> NonterminalString {
|
||||
NonterminalString(intern(t))
|
||||
@ -12,7 +13,7 @@ fn nt(t: &str) -> NonterminalString {
|
||||
|
||||
const ITERATIONS: usize = 22;
|
||||
|
||||
fn random_test(grammar: &Grammar, states: &[State], start_symbol: NonterminalString) {
|
||||
fn random_test<'g>(grammar: &Grammar, states: &'g [State<'g>], start_symbol: NonterminalString) {
|
||||
for i in 0..ITERATIONS {
|
||||
let input_tree = generate::random_parse_tree(grammar, start_symbol);
|
||||
let output_tree = interpret(&states, input_tree.terminals()).unwrap();
|
||||
@ -117,7 +118,7 @@ grammar;
|
||||
|
||||
// for now, just test that process does not result in an error
|
||||
// and yields expected number of states.
|
||||
let states = build_states(&grammar, nt("S")).unwrap();
|
||||
let states = build_lr1_states(&grammar, nt("S")).unwrap();
|
||||
assert_eq!(states.len(), 16);
|
||||
|
||||
// execute it on some sample inputs.
|
||||
@ -185,5 +186,5 @@ fn shift_reduce_conflict1() {
|
||||
};
|
||||
"#);
|
||||
|
||||
assert!(build_states(&grammar, nt("E")).is_err());
|
||||
assert!(build_lr1_states(&grammar, nt("E")).is_err());
|
||||
}
|
@ -7,27 +7,31 @@ use std::iter::IntoIterator;
|
||||
use std::fmt::{Debug, Display, Formatter, Error};
|
||||
use util::Sep;
|
||||
|
||||
pub fn interpret<TOKENS>(states: &[State], tokens: TOKENS) -> Result<ParseTree, ()>
|
||||
pub type InterpretError<'grammar> = (&'grammar State<'grammar>, Lookahead);
|
||||
|
||||
pub fn interpret<'grammar,TOKENS>(states: &'grammar [State<'grammar>], tokens: TOKENS)
|
||||
-> Result<ParseTree, InterpretError<'grammar>>
|
||||
where TOKENS: IntoIterator<Item=TerminalString>
|
||||
{
|
||||
let mut m = Machine::new(states);
|
||||
m.execute(tokens.into_iter())
|
||||
}
|
||||
|
||||
struct Machine<'states, 'grammar:'states> {
|
||||
states: &'states [State<'grammar>],
|
||||
state_stack: Vec<&'states State<'grammar>>,
|
||||
struct Machine<'grammar> {
|
||||
states: &'grammar [State<'grammar>],
|
||||
state_stack: Vec<&'grammar State<'grammar>>,
|
||||
data_stack: Vec<ParseTree>,
|
||||
}
|
||||
|
||||
impl<'states, 'grammar> Machine<'states, 'grammar> {
|
||||
fn new(states: &'states [State<'grammar>]) -> Machine<'states,'grammar> {
|
||||
impl<'grammar> Machine<'grammar> {
|
||||
fn new(states: &'grammar [State<'grammar>]) -> Machine<'grammar> {
|
||||
Machine { states: states,
|
||||
state_stack: vec![],
|
||||
data_stack: vec![] }
|
||||
}
|
||||
|
||||
fn execute<TOKENS>(&mut self, mut tokens: TOKENS) -> Result<ParseTree, ()>
|
||||
fn execute<TOKENS>(&mut self, mut tokens: TOKENS)
|
||||
-> Result<ParseTree, InterpretError<'grammar>>
|
||||
where TOKENS: Iterator<Item=TerminalString>
|
||||
{
|
||||
assert!(self.state_stack.is_empty());
|
||||
@ -41,7 +45,7 @@ impl<'states, 'grammar> Machine<'states, 'grammar> {
|
||||
|
||||
// check whether we can shift this token
|
||||
match state.tokens.get(&Lookahead::Terminal(terminal)) {
|
||||
None => { return Err(()); }
|
||||
None => { return Err((state, Lookahead::Terminal(terminal))); }
|
||||
|
||||
Some(&Action::Shift(next_index)) => {
|
||||
self.data_stack.push(ParseTree::Terminal(terminal));
|
||||
@ -60,7 +64,7 @@ impl<'states, 'grammar> Machine<'states, 'grammar> {
|
||||
loop {
|
||||
let state = *self.state_stack.last().unwrap();
|
||||
match state.tokens.get(&Lookahead::EOF) {
|
||||
None => { return Err(()); }
|
||||
None => { return Err((state, Lookahead::EOF)); }
|
||||
Some(&Action::Shift(_)) => { unreachable!("cannot shift EOF") }
|
||||
Some(&Action::Reduce(production)) => {
|
||||
if !self.reduce(production) {
|
||||
|
151
lalrpop/src/lr1/la0/mod.rs
Normal file
151
lalrpop/src/lr1/la0/mod.rs
Normal file
@ -0,0 +1,151 @@
|
||||
//! Mega naive LALR(1) generation algorithm.
|
||||
|
||||
use lr1::core;
|
||||
use grammar::repr::*;
|
||||
use std::rc::Rc;
|
||||
use util::{map, Map};
|
||||
use itertools::Itertools;
|
||||
use std::collections::hash_map::Entry;
|
||||
use super::{Action, State, StateIndex, Item, Items, Lookahead, TableConstructionError};
|
||||
use super::Action::{Reduce, Shift};
|
||||
|
||||
#[cfg(test)]
|
||||
mod test;
|
||||
|
||||
#[derive(Clone, Debug, PartialEq, Eq, Hash)]
|
||||
struct LR0Item<'grammar> {
|
||||
production: &'grammar Production,
|
||||
index: usize
|
||||
}
|
||||
|
||||
// Intermediate LALR(1) state. Identical to an LR(1) state, but that
|
||||
// the items can be pushed to. We initially create these with an empty
|
||||
// set of actions, as well.
|
||||
struct LALR1State<'grammar> {
|
||||
index: StateIndex,
|
||||
items: Vec<Item<'grammar>>,
|
||||
tokens: Map<Lookahead, Action<'grammar>>,
|
||||
gotos: Map<NonterminalString, StateIndex>,
|
||||
}
|
||||
|
||||
pub fn lalr_states<'grammar>(grammar: &'grammar Grammar,
|
||||
start: NonterminalString)
|
||||
-> Result<Vec<State<'grammar>>, TableConstructionError<'grammar>>
|
||||
{
|
||||
// First build the LR(1) states
|
||||
let lr_states = try!(core::build_lr1_states(grammar, start));
|
||||
collapse_to_lalr_states(&lr_states)
|
||||
}
|
||||
|
||||
pub fn collapse_to_lalr_states<'grammar>(lr_states: &[State<'grammar>])
|
||||
-> Result<Vec<State<'grammar>>,
|
||||
TableConstructionError<'grammar>>
|
||||
{
|
||||
// Now compress them. This vector stores, for each state, the
|
||||
// LALR(1) state to which we will remap it.
|
||||
let mut remap: Vec<_> = (0..lr_states.len()).map(|_| StateIndex(0)).collect();
|
||||
let mut lalr1_map: Map<Vec<LR0Item>, StateIndex> = map();
|
||||
let mut lalr1_states: Vec<LALR1State> = vec![];
|
||||
|
||||
for (lr1_index, lr1_state) in lr_states.iter().enumerate() {
|
||||
let lr0_kernel: Vec<_> =
|
||||
lr1_state.items.vec.iter()
|
||||
.map(|item| LR0Item {
|
||||
production: item.production,
|
||||
index: item.index,
|
||||
})
|
||||
.dedup()
|
||||
.collect();
|
||||
|
||||
let lalr1_index =
|
||||
*lalr1_map.entry(lr0_kernel)
|
||||
.or_insert_with(|| {
|
||||
let index = StateIndex(lalr1_states.len());
|
||||
lalr1_states.push(LALR1State {
|
||||
index: index,
|
||||
items: vec![],
|
||||
tokens: map(),
|
||||
gotos: map()
|
||||
});
|
||||
index
|
||||
});
|
||||
|
||||
lalr1_states[lalr1_index.0].items.extend(
|
||||
lr1_state.items.vec.iter().cloned());
|
||||
|
||||
remap[lr1_index] = lalr1_index;
|
||||
}
|
||||
|
||||
// Now that items are fully built, create the actions
|
||||
for (lr1_index, lr1_state) in lr_states.iter().enumerate() {
|
||||
let lalr1_index = remap[lr1_index];
|
||||
let lalr1_state = &mut lalr1_states[lalr1_index.0];
|
||||
|
||||
for (&lookahead, &lr1_action) in &lr1_state.tokens {
|
||||
let lalr1_action = match lr1_action {
|
||||
Action::Shift(state) => Action::Shift(remap[state.0]),
|
||||
Action::Reduce(prod) => Action::Reduce(prod),
|
||||
};
|
||||
|
||||
match lalr1_state.tokens.entry(lookahead) {
|
||||
Entry::Occupied(slot) => {
|
||||
let old_action = *slot.get();
|
||||
if old_action != lalr1_action {
|
||||
return Err(conflict(&lalr1_state.items, lookahead,
|
||||
old_action, lalr1_action));
|
||||
}
|
||||
}
|
||||
Entry::Vacant(slot) => {
|
||||
slot.insert(lalr1_action);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (&nt, &lr1_dest) in &lr1_state.gotos {
|
||||
let lalr1_dest = remap[lr1_dest.0];
|
||||
|
||||
match lalr1_state.gotos.entry(nt) {
|
||||
Entry::Occupied(slot) => {
|
||||
let old_dest = *slot.get();
|
||||
assert_eq!(old_dest, lalr1_dest);
|
||||
}
|
||||
Entry::Vacant(slot) => {
|
||||
slot.insert(lalr1_dest);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Finally, create the new states
|
||||
Ok(
|
||||
lalr1_states.into_iter()
|
||||
.map(|lr| State {
|
||||
index: lr.index,
|
||||
items: Items { vec: Rc::new(lr.items) },
|
||||
tokens: lr.tokens,
|
||||
gotos: lr.gotos
|
||||
})
|
||||
.collect())
|
||||
}
|
||||
|
||||
fn conflict<'grammar>(items: &[Item<'grammar>],
|
||||
lookahead: Lookahead,
|
||||
action1: Action<'grammar>,
|
||||
action2: Action<'grammar>)
|
||||
-> TableConstructionError<'grammar> {
|
||||
let (production, conflict) = match (action1, action2) {
|
||||
(c @ Shift(_), Reduce(p)) |
|
||||
(Reduce(p), c @ Shift(_)) |
|
||||
(Reduce(p), c @ Reduce(_)) => { (p, c) }
|
||||
_ => {
|
||||
panic!("conflict between {:?} and {:?}", action1, action2)
|
||||
}
|
||||
};
|
||||
|
||||
TableConstructionError {
|
||||
items: Items { vec: Rc::new(items.to_vec()) },
|
||||
lookahead: lookahead,
|
||||
production: production,
|
||||
conflict: conflict,
|
||||
}
|
||||
}
|
41
lalrpop/src/lr1/la0/test.rs
Normal file
41
lalrpop/src/lr1/la0/test.rs
Normal file
@ -0,0 +1,41 @@
|
||||
use intern::intern;
|
||||
use generate;
|
||||
use grammar::repr::*;
|
||||
use test_util::{compare, expect_debug, normalized_grammar};
|
||||
use super::lalr_states;
|
||||
use super::super::interpret::interpret;
|
||||
|
||||
fn nt(t: &str) -> NonterminalString {
|
||||
NonterminalString(intern(t))
|
||||
}
|
||||
|
||||
macro_rules! tokens {
|
||||
($($x:expr),*) => {
|
||||
vec![$(TerminalString(intern($x))),*].into_iter()
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn figure9_23() {
|
||||
let grammar = normalized_grammar(r#"
|
||||
grammar;
|
||||
extern token { enum Tok { } }
|
||||
S: () = E => ();
|
||||
E: () = {
|
||||
E "-" T => ();
|
||||
T => ();
|
||||
};
|
||||
T: () = {
|
||||
"N" => ();
|
||||
"(" E ")" => ();
|
||||
};
|
||||
"#);
|
||||
|
||||
let states = lalr_states(&grammar, nt("S")).unwrap();
|
||||
println!("{:#?}", states);
|
||||
|
||||
let tree = interpret(&states, tokens!["N", "-", "(", "N", "-", "N", ")"]).unwrap();
|
||||
assert_eq!(
|
||||
&format!("{:?}", tree)[..],
|
||||
r#"[S: [E: [E: [T: "N"]], "-", [T: "(", [E: [E: [T: "N"]], "-", [T: "N"]], ")"]]]"#);
|
||||
}
|
@ -4,23 +4,19 @@ use kernel_set;
|
||||
use grammar::repr::*;
|
||||
use std::fmt::{Debug, Formatter, Error};
|
||||
use std::rc::Rc;
|
||||
use util::{map, Map, Multimap, Set, Prefix};
|
||||
use util::{Map, Prefix};
|
||||
|
||||
pub mod ascent;
|
||||
|
||||
mod core;
|
||||
mod error;
|
||||
mod first;
|
||||
mod la0;
|
||||
|
||||
#[cfg(test)] mod interpret;
|
||||
#[cfg(test)] mod test;
|
||||
|
||||
pub use self::error::report_error;
|
||||
|
||||
struct LR1<'grammar> {
|
||||
grammar: &'grammar Grammar,
|
||||
first_sets: first::FirstSets,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct State<'grammar> {
|
||||
index: StateIndex,
|
||||
@ -29,7 +25,7 @@ pub struct State<'grammar> {
|
||||
gotos: Map<NonterminalString, StateIndex>,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
|
||||
enum Action<'grammar> {
|
||||
Shift(StateIndex),
|
||||
Reduce(&'grammar Production),
|
||||
@ -75,137 +71,9 @@ pub fn build_states<'grammar>(grammar: &'grammar Grammar,
|
||||
start: NonterminalString)
|
||||
-> Result<Vec<State<'grammar>>, TableConstructionError<'grammar>>
|
||||
{
|
||||
let lr1 = LR1::new(grammar);
|
||||
lr1.build_states(start)
|
||||
}
|
||||
|
||||
impl<'grammar> LR1<'grammar> {
|
||||
fn new(grammar: &'grammar Grammar) -> LR1 {
|
||||
LR1 {
|
||||
grammar: grammar,
|
||||
first_sets: first::FirstSets::new(grammar),
|
||||
}
|
||||
}
|
||||
|
||||
fn build_states(&self, start_nt: NonterminalString)
|
||||
-> Result<Vec<State<'grammar>>, TableConstructionError<'grammar>>
|
||||
{
|
||||
let mut kernel_set = kernel_set::KernelSet::new();
|
||||
let mut states = vec![];
|
||||
|
||||
// create the starting state
|
||||
kernel_set.add_state(
|
||||
self.transitive_closure(
|
||||
self.items(start_nt, 0, Lookahead::EOF)));
|
||||
|
||||
while let Some(items) = kernel_set.next() {
|
||||
let index = StateIndex(states.len());
|
||||
let mut this_state = State { index: index, items: items.clone(),
|
||||
tokens: map(), gotos: map() };
|
||||
|
||||
// group the items that we can transition into by shifting
|
||||
// over a term or nonterm
|
||||
let transitions: Multimap<Symbol, Item<'grammar>> =
|
||||
items.vec
|
||||
.iter()
|
||||
.filter_map(|item| item.shifted_item())
|
||||
.collect();
|
||||
|
||||
for (symbol, items) in transitions.into_iter() {
|
||||
let items = self.transitive_closure(items);
|
||||
let next_state = kernel_set.add_state(items);
|
||||
|
||||
match symbol {
|
||||
Symbol::Terminal(s) => {
|
||||
let action = Action::Shift(next_state);
|
||||
let prev = this_state.tokens.insert(Lookahead::Terminal(s), action);
|
||||
assert!(prev.is_none()); // cannot have a shift/shift conflict
|
||||
}
|
||||
|
||||
Symbol::Nonterminal(s) => {
|
||||
let prev = this_state.gotos.insert(s, next_state);
|
||||
assert!(prev.is_none());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// finally, consider the reductions
|
||||
for item in items.vec.iter().filter(|i| i.can_reduce()) {
|
||||
let action = Action::Reduce(item.production);
|
||||
let prev = this_state.tokens.insert(item.lookahead, action);
|
||||
if let Some(conflict) = prev {
|
||||
return Err(TableConstructionError {
|
||||
items: items.clone(),
|
||||
lookahead: item.lookahead,
|
||||
production: item.production,
|
||||
conflict: conflict,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// extract a new state
|
||||
states.push(this_state);
|
||||
}
|
||||
|
||||
Ok(states)
|
||||
}
|
||||
|
||||
fn items(&self,
|
||||
id: NonterminalString,
|
||||
index: usize,
|
||||
lookahead: Lookahead)
|
||||
-> Vec<Item<'grammar>>
|
||||
{
|
||||
self.grammar.productions_for(id)
|
||||
.iter()
|
||||
.map(|production| {
|
||||
debug_assert!(index <= production.symbols.len());
|
||||
Item { production: production,
|
||||
index: index,
|
||||
lookahead: lookahead }
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
// expands `state` with epsilon moves
|
||||
fn transitive_closure(&self, mut items: Vec<Item<'grammar>>)
|
||||
-> Items<'grammar>
|
||||
{
|
||||
let mut counter = 0;
|
||||
|
||||
let mut set: Set<Item<'grammar>> =
|
||||
items.iter().cloned().collect();
|
||||
|
||||
while counter < items.len() {
|
||||
let new_items: Vec<_> =
|
||||
items[counter..]
|
||||
.iter()
|
||||
.filter_map(|item| {
|
||||
let shift_symbol = item.shift_symbol();
|
||||
match shift_symbol {
|
||||
None => None, // requires a reduce
|
||||
Some((Symbol::Terminal(_), _)) => None, // requires a shift
|
||||
Some((Symbol::Nonterminal(nt), remainder)) => {
|
||||
Some((nt, remainder, item.lookahead))
|
||||
}
|
||||
}
|
||||
})
|
||||
.flat_map(|(nt, remainder, lookahead)| {
|
||||
let first_set = self.first_sets.first(remainder, lookahead);
|
||||
first_set.into_iter()
|
||||
.flat_map(move |l| self.items(nt, 0, l))
|
||||
})
|
||||
.filter(|&item| set.insert(item))
|
||||
.collect();
|
||||
|
||||
counter = items.len();
|
||||
items.extend(new_items);
|
||||
}
|
||||
|
||||
items.sort();
|
||||
items.dedup();
|
||||
|
||||
Items { vec: Rc::new(items) }
|
||||
match grammar.algorithm {
|
||||
Algorithm::LR1 => core::build_lr1_states(grammar, start),
|
||||
Algorithm::LALR1 => la0::lalr_states(grammar, start),
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -88,6 +88,12 @@ impl LowerState {
|
||||
.map(|p| r::Parameter { name: p.name, ty: p.ty.type_repr() })
|
||||
.collect();
|
||||
|
||||
let algorithm =
|
||||
match grammar.algorithm {
|
||||
None => r::Algorithm::LR1,
|
||||
Some(ref a) => r::Algorithm::from_str(a.text).unwrap(),
|
||||
};
|
||||
|
||||
Ok(r::Grammar {
|
||||
prefix: self.prefix,
|
||||
start_nonterminals: start_symbols,
|
||||
@ -100,6 +106,7 @@ impl LowerState {
|
||||
type_parameters: grammar.type_parameters,
|
||||
parameters: parameters,
|
||||
where_clauses: grammar.where_clauses,
|
||||
algorithm: algorithm
|
||||
})
|
||||
}
|
||||
|
||||
|
@ -4,6 +4,7 @@ use super::{NormResult, NormError};
|
||||
use super::norm_util::{self, Symbols};
|
||||
|
||||
use grammar::parse_tree::*;
|
||||
use grammar::repr;
|
||||
use intern::{intern, InternedString};
|
||||
use util::{Map, Multimap, Sep, set};
|
||||
|
||||
@ -55,6 +56,17 @@ struct ScopeChain<'scope> {
|
||||
|
||||
impl<'grammar> Validator<'grammar> {
|
||||
fn validate(&self) -> NormResult<()> {
|
||||
if let Some(ref algorithm) = self.grammar.algorithm {
|
||||
match repr::Algorithm::from_str(algorithm.text) {
|
||||
Some(_) => { }
|
||||
None => {
|
||||
return_err!(
|
||||
algorithm.span,
|
||||
"unrecognized algorithm `{}`", algorithm.text);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for item in &self.grammar.items {
|
||||
match *item {
|
||||
GrammarItem::Use(..) => { }
|
||||
|
@ -12,6 +12,7 @@ pub Grammar: Grammar =
|
||||
<tps:GrammarTypeParameters?>
|
||||
<parameters:GrammarParameters?>
|
||||
<where_clauses:"where"?>
|
||||
<algorithm:Algorithm?>
|
||||
";"
|
||||
<items:GrammarItem*> => {
|
||||
let where_clauses =
|
||||
@ -25,7 +26,16 @@ pub Grammar: Grammar =
|
||||
type_parameters: tps.unwrap_or(vec![]),
|
||||
parameters: parameters.unwrap_or(vec![]),
|
||||
where_clauses: where_clauses,
|
||||
items: uses.into_iter().chain(items).collect() }
|
||||
items: uses.into_iter().chain(items).collect(),
|
||||
algorithm: algorithm }
|
||||
};
|
||||
|
||||
Algorithm: Algorithm =
|
||||
"[" <lo:@L> <t:StringLiteral> <hi:@R> "]" => {
|
||||
Algorithm {
|
||||
span: Span(lo, hi),
|
||||
text: t
|
||||
}
|
||||
};
|
||||
|
||||
GrammarTypeParameters: Vec<TypeParameter> =
|
||||
|
Loading…
x
Reference in New Issue
Block a user