add support for LALR(1)

This commit is contained in:
Niko Matsakis 2015-08-01 23:30:13 -07:00
parent 0e46b4bbcd
commit e02aaef522
17 changed files with 548 additions and 160 deletions

View File

@ -44,10 +44,10 @@ build = "build.rs" # LALRPOP preprocessing
# Add a dependency on the LALRPOP runtime library: # Add a dependency on the LALRPOP runtime library:
[dependencies.lalrpop-util] [dependencies.lalrpop-util]
version = "0.1" version = "0.2.0"
[build-dependencies.lalrpop] [build-dependencies.lalrpop]
version = "0.1" version = "0.2.0"
``` ```
And create a `build.rs` file that looks like: And create a `build.rs` file that looks like:

View File

@ -1,4 +1,7 @@
[package] [package]
name = "lalrpop-intern" name = "lalrpop-intern"
version = "0.2.0" # LALRPOP version = "0.2.0" # LALRPOP
description = "Simple string interner used by LALRPOP"
repository = "https://github.com/nikomatsakis/lalrpop"
license = "Unlicense"
authors = ["Niko Matsakis <niko@alum.mit.edu>"] authors = ["Niko Matsakis <niko@alum.mit.edu>"]

View File

@ -0,0 +1,32 @@
grammar(scale: i32)["LALR(1)"];
use util::tok::Tok;
extern token {
enum Tok {
"(" => Tok::LParen(..),
")" => Tok::RParen(..),
"-" => Tok::Minus(..),
"+" => Tok::Plus(..),
"*" => Tok::Times(..),
"/" => Tok::Div(..),
"Num" => Tok::Num(<i32>)
}
}
pub Expr = {
<l:Expr> "-" <r:Factor> => l - r;
<l:Expr> "+" <r:Factor> => l + r;
Factor;
};
Factor = {
<l:Factor> "*" <r:Term> => l * r;
<l:Factor> "/" <r:Term> => l / r;
Term;
};
Term: i32 = {
<n:"Num"> => n * scale;
"(" <Expr> ")";
};

View File

@ -0,0 +1,5 @@
#![allow(unused_imports)]
#![allow(unused_variables)]
use util::tok::Tok;
extern crate lalrpop_util as __lalrpop_util;
use self::__lalrpop_util::ParseError as __ParseError;

View File

@ -7,6 +7,9 @@ mod sub;
/// more interesting demonstration of parsing full expressions /// more interesting demonstration of parsing full expressions
mod expr; mod expr;
/// more interesting demonstration of parsing full expressions
mod expr_lalr;
/// test that passes in lifetime/type/formal parameters and threads /// test that passes in lifetime/type/formal parameters and threads
/// them through, building an AST from the result /// them through, building an AST from the result
mod expr_arena; mod expr_arena;
@ -54,6 +57,31 @@ fn expr_test5() {
util::test(|v| expr::parse_Expr(11, v), "22 * 3 - 6", 22*11 * 3*11 - 6*11); util::test(|v| expr::parse_Expr(11, v), "22 * 3 - 6", 22*11 * 3*11 - 6*11);
} }
#[test]
fn expr_lalr_test1() {
util::test(|v| expr::parse_Expr(1, v), "22 - 3", 22 - 3);
}
#[test]
fn expr_lalr_test2() {
util::test(|v| expr::parse_Expr(1, v), "22 - (3 + 5)", 22 - (3 + 5));
}
#[test]
fn expr_lalr_test3() {
util::test(|v| expr::parse_Expr(1, v), "22 - (3 - 5) - 13", 22 - (3 - 5) - 13);
}
#[test]
fn expr_lalr_test4() {
util::test(|v| expr::parse_Expr(1, v), "22 * 3 - 6", 22 * 3 - 6);
}
#[test]
fn expr_lalr_test5() {
util::test(|v| expr::parse_Expr(11, v), "22 * 3 - 6", 22*11 * 3*11 - 6*11);
}
#[test] #[test]
fn sub_test1() { fn sub_test1() {
util::test(sub::parse_S, "22 - 3", 22 - 3); util::test(sub::parse_S, "22 - 3", 22 - 3);

View File

@ -25,12 +25,57 @@ fn process_dir<P:AsRef<Path>>(root_dir: P) -> io::Result<()> {
let lalrpop_files = try!(lalrpop_files(root_dir)); let lalrpop_files = try!(lalrpop_files(root_dir));
for lalrpop_file in lalrpop_files { for lalrpop_file in lalrpop_files {
let rs_file = lalrpop_file.with_extension("rs"); let rs_file = lalrpop_file.with_extension("rs");
if try!(needs_rebuild(&lalrpop_file, &rs_file)) {
let grammar = try!(parse_and_normalize_grammar(lalrpop_file)); let grammar = try!(parse_and_normalize_grammar(lalrpop_file));
try!(emit_recursive_ascent(&rs_file, &grammar)); try!(emit_recursive_ascent(&rs_file, &grammar));
try!(make_read_only(&rs_file));
}
} }
Ok(()) Ok(())
} }
fn needs_rebuild(lalrpop_file: &Path,
rs_file: &Path)
-> io::Result<bool>
{
return match fs::metadata(&rs_file) {
Ok(rs_metadata) => {
let lalrpop_metadata = try!(fs::metadata(&lalrpop_file));
Ok(compare_modification_times(&lalrpop_metadata, &rs_metadata))
}
Err(e) => {
match e.kind() {
io::ErrorKind::NotFound => Ok(true),
_ => Err(e),
}
}
};
#[cfg(unix)]
fn compare_modification_times(lalrpop_metadata: &fs::Metadata,
rs_metadata: &fs::Metadata)
-> bool
{
use std::os::unix::fs::MetadataExt;
lalrpop_metadata.mtime() >= rs_metadata.mtime()
}
#[cfg(not(unix))]
fn compare_modification_times(lalrpop_metadata: &fs::Metadata,
rs_metadata: &fs::Metadata)
-> bool
{
true
}
}
fn make_read_only(rs_file: &Path) -> io::Result<()> {
let rs_metadata = try!(fs::metadata(&rs_file));
let mut rs_permissions = rs_metadata.permissions();
rs_permissions.set_readonly(true);
fs::set_permissions(&rs_file, rs_permissions)
}
fn lalrpop_files<P:AsRef<Path>>(root_dir: P) -> io::Result<Vec<PathBuf>> { fn lalrpop_files<P:AsRef<Path>>(root_dir: P) -> io::Result<Vec<PathBuf>> {
let mut result = vec![]; let mut result = vec![];
for entry in try!(fs::read_dir(root_dir)) { for entry in try!(fs::read_dir(root_dir)) {

View File

@ -18,6 +18,13 @@ pub struct Grammar {
pub parameters: Vec<Parameter>, pub parameters: Vec<Parameter>,
pub where_clauses: Vec<String>, pub where_clauses: Vec<String>,
pub items: Vec<GrammarItem>, pub items: Vec<GrammarItem>,
pub algorithm: Option<Algorithm>,
}
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct Algorithm {
pub span: Span,
pub text: InternedString,
} }
#[derive(Copy, Clone, Debug, Hash, PartialEq, Eq, PartialOrd, Ord)] #[derive(Copy, Clone, Debug, Hash, PartialEq, Eq, PartialOrd, Ord)]

View File

@ -4,7 +4,7 @@
* representation incrementally. * representation incrementally.
*/ */
use intern::{InternedString}; use intern::{self, InternedString};
use grammar::pattern::{Pattern}; use grammar::pattern::{Pattern};
use std::fmt::{Debug, Display, Formatter, Error}; use std::fmt::{Debug, Display, Formatter, Error};
use util::{map, Map, Sep}; use util::{map, Map, Sep};
@ -21,6 +21,9 @@ pub struct Grammar {
// that they do not conflict with any action strings // that they do not conflict with any action strings
pub prefix: String, pub prefix: String,
// algorithm user requested for this parser
pub algorithm: Algorithm,
// these are the nonterminals that were declared to be public; the // these are the nonterminals that were declared to be public; the
// key is the user's name for the symbol, the value is the // key is the user's name for the symbol, the value is the
// artificial symbol we introduce, which will always have a single // artificial symbol we introduce, which will always have a single
@ -48,6 +51,12 @@ pub struct Grammar {
pub types: Types, pub types: Types,
} }
#[derive(Clone, Debug, PartialEq, Eq)]
pub enum Algorithm {
LR1,
LALR1,
}
#[derive(Clone, Debug, PartialEq, Eq)] #[derive(Clone, Debug, PartialEq, Eq)]
pub struct Parameter { pub struct Parameter {
pub name: InternedString, pub name: InternedString,
@ -323,3 +332,12 @@ impl Grammar {
} }
} }
impl Algorithm {
pub fn from_str(s: InternedString) -> Option<Algorithm> {
intern::read(|r| match r.data(s) {
"LR" | "LR(1)" => Some(Algorithm::LR1),
"LALR" | "LALR(1)" => Some(Algorithm::LALR1),
_ => None,
})
}
}

156
lalrpop/src/lr1/core/mod.rs Normal file
View File

@ -0,0 +1,156 @@
//! Core LR(1) state construction algorithm.
use kernel_set;
use grammar::repr::*;
use lr1::first;
use lr1::{Action, Lookahead, Item, Items, State, StateIndex, TableConstructionError};
use std::rc::Rc;
use util::{map, Multimap, Set};
#[cfg(test)] mod test;
pub fn build_lr1_states<'grammar>(grammar: &'grammar Grammar,
start: NonterminalString)
-> Result<Vec<State<'grammar>>,
TableConstructionError<'grammar>>
{
let lr1 = LR1::new(grammar);
lr1.build_states(start)
}
struct LR1<'grammar> {
grammar: &'grammar Grammar,
first_sets: first::FirstSets,
}
impl<'grammar> LR1<'grammar> {
fn new(grammar: &'grammar Grammar) -> LR1 {
LR1 {
grammar: grammar,
first_sets: first::FirstSets::new(grammar),
}
}
fn build_states(&self, start_nt: NonterminalString)
-> Result<Vec<State<'grammar>>, TableConstructionError<'grammar>>
{
let mut kernel_set = kernel_set::KernelSet::new();
let mut states = vec![];
// create the starting state
kernel_set.add_state(
self.transitive_closure(
self.items(start_nt, 0, Lookahead::EOF)));
while let Some(items) = kernel_set.next() {
let index = StateIndex(states.len());
let mut this_state = State { index: index, items: items.clone(),
tokens: map(), gotos: map() };
// group the items that we can transition into by shifting
// over a term or nonterm
let transitions: Multimap<Symbol, Item<'grammar>> =
items.vec
.iter()
.filter_map(|item| item.shifted_item())
.collect();
for (symbol, items) in transitions.into_iter() {
let items = self.transitive_closure(items);
let next_state = kernel_set.add_state(items);
match symbol {
Symbol::Terminal(s) => {
let action = Action::Shift(next_state);
let prev = this_state.tokens.insert(Lookahead::Terminal(s), action);
assert!(prev.is_none()); // cannot have a shift/shift conflict
}
Symbol::Nonterminal(s) => {
let prev = this_state.gotos.insert(s, next_state);
assert!(prev.is_none());
}
}
}
// finally, consider the reductions
for item in items.vec.iter().filter(|i| i.can_reduce()) {
let action = Action::Reduce(item.production);
let prev = this_state.tokens.insert(item.lookahead, action);
if let Some(conflict) = prev {
return Err(TableConstructionError {
items: items.clone(),
lookahead: item.lookahead,
production: item.production,
conflict: conflict,
});
}
}
// extract a new state
states.push(this_state);
}
Ok(states)
}
fn items(&self,
id: NonterminalString,
index: usize,
lookahead: Lookahead)
-> Vec<Item<'grammar>>
{
self.grammar.productions_for(id)
.iter()
.map(|production| {
debug_assert!(index <= production.symbols.len());
Item { production: production,
index: index,
lookahead: lookahead }
})
.collect()
}
// expands `state` with epsilon moves
fn transitive_closure(&self, mut items: Vec<Item<'grammar>>)
-> Items<'grammar>
{
let mut counter = 0;
let mut set: Set<Item<'grammar>> =
items.iter().cloned().collect();
while counter < items.len() {
let new_items: Vec<_> =
items[counter..]
.iter()
.filter_map(|item| {
let shift_symbol = item.shift_symbol();
match shift_symbol {
None => None, // requires a reduce
Some((Symbol::Terminal(_), _)) => None, // requires a shift
Some((Symbol::Nonterminal(nt), remainder)) => {
Some((nt, remainder, item.lookahead))
}
}
})
.flat_map(|(nt, remainder, lookahead)| {
let first_set = self.first_sets.first(remainder, lookahead);
first_set.into_iter()
.flat_map(move |l| self.items(nt, 0, l))
})
.filter(|&item| set.insert(item))
.collect();
counter = items.len();
items.extend(new_items);
}
items.sort();
items.dedup();
Items { vec: Rc::new(items) }
}
}

View File

@ -2,9 +2,10 @@ use intern::intern;
use generate; use generate;
use grammar::repr::*; use grammar::repr::*;
use test_util::{compare, expect_debug, normalized_grammar}; use test_util::{compare, expect_debug, normalized_grammar};
use super::{build_states, State, Items, Lookahead, LR1}; use lr1::Lookahead::EOF;
use super::Lookahead::EOF; use lr1::interpret::interpret;
use super::interpret::interpret; use lr1::core::{LR1, build_lr1_states};
use lr1::{State, Items, Lookahead};
fn nt(t: &str) -> NonterminalString { fn nt(t: &str) -> NonterminalString {
NonterminalString(intern(t)) NonterminalString(intern(t))
@ -12,7 +13,7 @@ fn nt(t: &str) -> NonterminalString {
const ITERATIONS: usize = 22; const ITERATIONS: usize = 22;
fn random_test(grammar: &Grammar, states: &[State], start_symbol: NonterminalString) { fn random_test<'g>(grammar: &Grammar, states: &'g [State<'g>], start_symbol: NonterminalString) {
for i in 0..ITERATIONS { for i in 0..ITERATIONS {
let input_tree = generate::random_parse_tree(grammar, start_symbol); let input_tree = generate::random_parse_tree(grammar, start_symbol);
let output_tree = interpret(&states, input_tree.terminals()).unwrap(); let output_tree = interpret(&states, input_tree.terminals()).unwrap();
@ -117,7 +118,7 @@ grammar;
// for now, just test that process does not result in an error // for now, just test that process does not result in an error
// and yields expected number of states. // and yields expected number of states.
let states = build_states(&grammar, nt("S")).unwrap(); let states = build_lr1_states(&grammar, nt("S")).unwrap();
assert_eq!(states.len(), 16); assert_eq!(states.len(), 16);
// execute it on some sample inputs. // execute it on some sample inputs.
@ -185,5 +186,5 @@ fn shift_reduce_conflict1() {
}; };
"#); "#);
assert!(build_states(&grammar, nt("E")).is_err()); assert!(build_lr1_states(&grammar, nt("E")).is_err());
} }

View File

@ -7,27 +7,31 @@ use std::iter::IntoIterator;
use std::fmt::{Debug, Display, Formatter, Error}; use std::fmt::{Debug, Display, Formatter, Error};
use util::Sep; use util::Sep;
pub fn interpret<TOKENS>(states: &[State], tokens: TOKENS) -> Result<ParseTree, ()> pub type InterpretError<'grammar> = (&'grammar State<'grammar>, Lookahead);
pub fn interpret<'grammar,TOKENS>(states: &'grammar [State<'grammar>], tokens: TOKENS)
-> Result<ParseTree, InterpretError<'grammar>>
where TOKENS: IntoIterator<Item=TerminalString> where TOKENS: IntoIterator<Item=TerminalString>
{ {
let mut m = Machine::new(states); let mut m = Machine::new(states);
m.execute(tokens.into_iter()) m.execute(tokens.into_iter())
} }
struct Machine<'states, 'grammar:'states> { struct Machine<'grammar> {
states: &'states [State<'grammar>], states: &'grammar [State<'grammar>],
state_stack: Vec<&'states State<'grammar>>, state_stack: Vec<&'grammar State<'grammar>>,
data_stack: Vec<ParseTree>, data_stack: Vec<ParseTree>,
} }
impl<'states, 'grammar> Machine<'states, 'grammar> { impl<'grammar> Machine<'grammar> {
fn new(states: &'states [State<'grammar>]) -> Machine<'states,'grammar> { fn new(states: &'grammar [State<'grammar>]) -> Machine<'grammar> {
Machine { states: states, Machine { states: states,
state_stack: vec![], state_stack: vec![],
data_stack: vec![] } data_stack: vec![] }
} }
fn execute<TOKENS>(&mut self, mut tokens: TOKENS) -> Result<ParseTree, ()> fn execute<TOKENS>(&mut self, mut tokens: TOKENS)
-> Result<ParseTree, InterpretError<'grammar>>
where TOKENS: Iterator<Item=TerminalString> where TOKENS: Iterator<Item=TerminalString>
{ {
assert!(self.state_stack.is_empty()); assert!(self.state_stack.is_empty());
@ -41,7 +45,7 @@ impl<'states, 'grammar> Machine<'states, 'grammar> {
// check whether we can shift this token // check whether we can shift this token
match state.tokens.get(&Lookahead::Terminal(terminal)) { match state.tokens.get(&Lookahead::Terminal(terminal)) {
None => { return Err(()); } None => { return Err((state, Lookahead::Terminal(terminal))); }
Some(&Action::Shift(next_index)) => { Some(&Action::Shift(next_index)) => {
self.data_stack.push(ParseTree::Terminal(terminal)); self.data_stack.push(ParseTree::Terminal(terminal));
@ -60,7 +64,7 @@ impl<'states, 'grammar> Machine<'states, 'grammar> {
loop { loop {
let state = *self.state_stack.last().unwrap(); let state = *self.state_stack.last().unwrap();
match state.tokens.get(&Lookahead::EOF) { match state.tokens.get(&Lookahead::EOF) {
None => { return Err(()); } None => { return Err((state, Lookahead::EOF)); }
Some(&Action::Shift(_)) => { unreachable!("cannot shift EOF") } Some(&Action::Shift(_)) => { unreachable!("cannot shift EOF") }
Some(&Action::Reduce(production)) => { Some(&Action::Reduce(production)) => {
if !self.reduce(production) { if !self.reduce(production) {

151
lalrpop/src/lr1/la0/mod.rs Normal file
View File

@ -0,0 +1,151 @@
//! Mega naive LALR(1) generation algorithm.
use lr1::core;
use grammar::repr::*;
use std::rc::Rc;
use util::{map, Map};
use itertools::Itertools;
use std::collections::hash_map::Entry;
use super::{Action, State, StateIndex, Item, Items, Lookahead, TableConstructionError};
use super::Action::{Reduce, Shift};
#[cfg(test)]
mod test;
#[derive(Clone, Debug, PartialEq, Eq, Hash)]
struct LR0Item<'grammar> {
production: &'grammar Production,
index: usize
}
// Intermediate LALR(1) state. Identical to an LR(1) state, but that
// the items can be pushed to. We initially create these with an empty
// set of actions, as well.
struct LALR1State<'grammar> {
index: StateIndex,
items: Vec<Item<'grammar>>,
tokens: Map<Lookahead, Action<'grammar>>,
gotos: Map<NonterminalString, StateIndex>,
}
pub fn lalr_states<'grammar>(grammar: &'grammar Grammar,
start: NonterminalString)
-> Result<Vec<State<'grammar>>, TableConstructionError<'grammar>>
{
// First build the LR(1) states
let lr_states = try!(core::build_lr1_states(grammar, start));
collapse_to_lalr_states(&lr_states)
}
pub fn collapse_to_lalr_states<'grammar>(lr_states: &[State<'grammar>])
-> Result<Vec<State<'grammar>>,
TableConstructionError<'grammar>>
{
// Now compress them. This vector stores, for each state, the
// LALR(1) state to which we will remap it.
let mut remap: Vec<_> = (0..lr_states.len()).map(|_| StateIndex(0)).collect();
let mut lalr1_map: Map<Vec<LR0Item>, StateIndex> = map();
let mut lalr1_states: Vec<LALR1State> = vec![];
for (lr1_index, lr1_state) in lr_states.iter().enumerate() {
let lr0_kernel: Vec<_> =
lr1_state.items.vec.iter()
.map(|item| LR0Item {
production: item.production,
index: item.index,
})
.dedup()
.collect();
let lalr1_index =
*lalr1_map.entry(lr0_kernel)
.or_insert_with(|| {
let index = StateIndex(lalr1_states.len());
lalr1_states.push(LALR1State {
index: index,
items: vec![],
tokens: map(),
gotos: map()
});
index
});
lalr1_states[lalr1_index.0].items.extend(
lr1_state.items.vec.iter().cloned());
remap[lr1_index] = lalr1_index;
}
// Now that items are fully built, create the actions
for (lr1_index, lr1_state) in lr_states.iter().enumerate() {
let lalr1_index = remap[lr1_index];
let lalr1_state = &mut lalr1_states[lalr1_index.0];
for (&lookahead, &lr1_action) in &lr1_state.tokens {
let lalr1_action = match lr1_action {
Action::Shift(state) => Action::Shift(remap[state.0]),
Action::Reduce(prod) => Action::Reduce(prod),
};
match lalr1_state.tokens.entry(lookahead) {
Entry::Occupied(slot) => {
let old_action = *slot.get();
if old_action != lalr1_action {
return Err(conflict(&lalr1_state.items, lookahead,
old_action, lalr1_action));
}
}
Entry::Vacant(slot) => {
slot.insert(lalr1_action);
}
}
}
for (&nt, &lr1_dest) in &lr1_state.gotos {
let lalr1_dest = remap[lr1_dest.0];
match lalr1_state.gotos.entry(nt) {
Entry::Occupied(slot) => {
let old_dest = *slot.get();
assert_eq!(old_dest, lalr1_dest);
}
Entry::Vacant(slot) => {
slot.insert(lalr1_dest);
}
}
}
}
// Finally, create the new states
Ok(
lalr1_states.into_iter()
.map(|lr| State {
index: lr.index,
items: Items { vec: Rc::new(lr.items) },
tokens: lr.tokens,
gotos: lr.gotos
})
.collect())
}
fn conflict<'grammar>(items: &[Item<'grammar>],
lookahead: Lookahead,
action1: Action<'grammar>,
action2: Action<'grammar>)
-> TableConstructionError<'grammar> {
let (production, conflict) = match (action1, action2) {
(c @ Shift(_), Reduce(p)) |
(Reduce(p), c @ Shift(_)) |
(Reduce(p), c @ Reduce(_)) => { (p, c) }
_ => {
panic!("conflict between {:?} and {:?}", action1, action2)
}
};
TableConstructionError {
items: Items { vec: Rc::new(items.to_vec()) },
lookahead: lookahead,
production: production,
conflict: conflict,
}
}

View File

@ -0,0 +1,41 @@
use intern::intern;
use generate;
use grammar::repr::*;
use test_util::{compare, expect_debug, normalized_grammar};
use super::lalr_states;
use super::super::interpret::interpret;
fn nt(t: &str) -> NonterminalString {
NonterminalString(intern(t))
}
macro_rules! tokens {
($($x:expr),*) => {
vec![$(TerminalString(intern($x))),*].into_iter()
}
}
#[test]
fn figure9_23() {
let grammar = normalized_grammar(r#"
grammar;
extern token { enum Tok { } }
S: () = E => ();
E: () = {
E "-" T => ();
T => ();
};
T: () = {
"N" => ();
"(" E ")" => ();
};
"#);
let states = lalr_states(&grammar, nt("S")).unwrap();
println!("{:#?}", states);
let tree = interpret(&states, tokens!["N", "-", "(", "N", "-", "N", ")"]).unwrap();
assert_eq!(
&format!("{:?}", tree)[..],
r#"[S: [E: [E: [T: "N"]], "-", [T: "(", [E: [E: [T: "N"]], "-", [T: "N"]], ")"]]]"#);
}

View File

@ -4,23 +4,19 @@ use kernel_set;
use grammar::repr::*; use grammar::repr::*;
use std::fmt::{Debug, Formatter, Error}; use std::fmt::{Debug, Formatter, Error};
use std::rc::Rc; use std::rc::Rc;
use util::{map, Map, Multimap, Set, Prefix}; use util::{Map, Prefix};
pub mod ascent; pub mod ascent;
mod core;
mod error; mod error;
mod first; mod first;
mod la0;
#[cfg(test)] mod interpret; #[cfg(test)] mod interpret;
#[cfg(test)] mod test;
pub use self::error::report_error; pub use self::error::report_error;
struct LR1<'grammar> {
grammar: &'grammar Grammar,
first_sets: first::FirstSets,
}
#[derive(Debug)] #[derive(Debug)]
pub struct State<'grammar> { pub struct State<'grammar> {
index: StateIndex, index: StateIndex,
@ -29,7 +25,7 @@ pub struct State<'grammar> {
gotos: Map<NonterminalString, StateIndex>, gotos: Map<NonterminalString, StateIndex>,
} }
#[derive(Debug)] #[derive(Copy, Clone, Debug, PartialEq, Eq)]
enum Action<'grammar> { enum Action<'grammar> {
Shift(StateIndex), Shift(StateIndex),
Reduce(&'grammar Production), Reduce(&'grammar Production),
@ -75,137 +71,9 @@ pub fn build_states<'grammar>(grammar: &'grammar Grammar,
start: NonterminalString) start: NonterminalString)
-> Result<Vec<State<'grammar>>, TableConstructionError<'grammar>> -> Result<Vec<State<'grammar>>, TableConstructionError<'grammar>>
{ {
let lr1 = LR1::new(grammar); match grammar.algorithm {
lr1.build_states(start) Algorithm::LR1 => core::build_lr1_states(grammar, start),
} Algorithm::LALR1 => la0::lalr_states(grammar, start),
impl<'grammar> LR1<'grammar> {
fn new(grammar: &'grammar Grammar) -> LR1 {
LR1 {
grammar: grammar,
first_sets: first::FirstSets::new(grammar),
}
}
fn build_states(&self, start_nt: NonterminalString)
-> Result<Vec<State<'grammar>>, TableConstructionError<'grammar>>
{
let mut kernel_set = kernel_set::KernelSet::new();
let mut states = vec![];
// create the starting state
kernel_set.add_state(
self.transitive_closure(
self.items(start_nt, 0, Lookahead::EOF)));
while let Some(items) = kernel_set.next() {
let index = StateIndex(states.len());
let mut this_state = State { index: index, items: items.clone(),
tokens: map(), gotos: map() };
// group the items that we can transition into by shifting
// over a term or nonterm
let transitions: Multimap<Symbol, Item<'grammar>> =
items.vec
.iter()
.filter_map(|item| item.shifted_item())
.collect();
for (symbol, items) in transitions.into_iter() {
let items = self.transitive_closure(items);
let next_state = kernel_set.add_state(items);
match symbol {
Symbol::Terminal(s) => {
let action = Action::Shift(next_state);
let prev = this_state.tokens.insert(Lookahead::Terminal(s), action);
assert!(prev.is_none()); // cannot have a shift/shift conflict
}
Symbol::Nonterminal(s) => {
let prev = this_state.gotos.insert(s, next_state);
assert!(prev.is_none());
}
}
}
// finally, consider the reductions
for item in items.vec.iter().filter(|i| i.can_reduce()) {
let action = Action::Reduce(item.production);
let prev = this_state.tokens.insert(item.lookahead, action);
if let Some(conflict) = prev {
return Err(TableConstructionError {
items: items.clone(),
lookahead: item.lookahead,
production: item.production,
conflict: conflict,
});
}
}
// extract a new state
states.push(this_state);
}
Ok(states)
}
fn items(&self,
id: NonterminalString,
index: usize,
lookahead: Lookahead)
-> Vec<Item<'grammar>>
{
self.grammar.productions_for(id)
.iter()
.map(|production| {
debug_assert!(index <= production.symbols.len());
Item { production: production,
index: index,
lookahead: lookahead }
})
.collect()
}
// expands `state` with epsilon moves
fn transitive_closure(&self, mut items: Vec<Item<'grammar>>)
-> Items<'grammar>
{
let mut counter = 0;
let mut set: Set<Item<'grammar>> =
items.iter().cloned().collect();
while counter < items.len() {
let new_items: Vec<_> =
items[counter..]
.iter()
.filter_map(|item| {
let shift_symbol = item.shift_symbol();
match shift_symbol {
None => None, // requires a reduce
Some((Symbol::Terminal(_), _)) => None, // requires a shift
Some((Symbol::Nonterminal(nt), remainder)) => {
Some((nt, remainder, item.lookahead))
}
}
})
.flat_map(|(nt, remainder, lookahead)| {
let first_set = self.first_sets.first(remainder, lookahead);
first_set.into_iter()
.flat_map(move |l| self.items(nt, 0, l))
})
.filter(|&item| set.insert(item))
.collect();
counter = items.len();
items.extend(new_items);
}
items.sort();
items.dedup();
Items { vec: Rc::new(items) }
} }
} }

View File

@ -88,6 +88,12 @@ impl LowerState {
.map(|p| r::Parameter { name: p.name, ty: p.ty.type_repr() }) .map(|p| r::Parameter { name: p.name, ty: p.ty.type_repr() })
.collect(); .collect();
let algorithm =
match grammar.algorithm {
None => r::Algorithm::LR1,
Some(ref a) => r::Algorithm::from_str(a.text).unwrap(),
};
Ok(r::Grammar { Ok(r::Grammar {
prefix: self.prefix, prefix: self.prefix,
start_nonterminals: start_symbols, start_nonterminals: start_symbols,
@ -100,6 +106,7 @@ impl LowerState {
type_parameters: grammar.type_parameters, type_parameters: grammar.type_parameters,
parameters: parameters, parameters: parameters,
where_clauses: grammar.where_clauses, where_clauses: grammar.where_clauses,
algorithm: algorithm
}) })
} }

View File

@ -4,6 +4,7 @@ use super::{NormResult, NormError};
use super::norm_util::{self, Symbols}; use super::norm_util::{self, Symbols};
use grammar::parse_tree::*; use grammar::parse_tree::*;
use grammar::repr;
use intern::{intern, InternedString}; use intern::{intern, InternedString};
use util::{Map, Multimap, Sep, set}; use util::{Map, Multimap, Sep, set};
@ -55,6 +56,17 @@ struct ScopeChain<'scope> {
impl<'grammar> Validator<'grammar> { impl<'grammar> Validator<'grammar> {
fn validate(&self) -> NormResult<()> { fn validate(&self) -> NormResult<()> {
if let Some(ref algorithm) = self.grammar.algorithm {
match repr::Algorithm::from_str(algorithm.text) {
Some(_) => { }
None => {
return_err!(
algorithm.span,
"unrecognized algorithm `{}`", algorithm.text);
}
}
}
for item in &self.grammar.items { for item in &self.grammar.items {
match *item { match *item {
GrammarItem::Use(..) => { } GrammarItem::Use(..) => { }

View File

@ -12,6 +12,7 @@ pub Grammar: Grammar =
<tps:GrammarTypeParameters?> <tps:GrammarTypeParameters?>
<parameters:GrammarParameters?> <parameters:GrammarParameters?>
<where_clauses:"where"?> <where_clauses:"where"?>
<algorithm:Algorithm?>
";" ";"
<items:GrammarItem*> => { <items:GrammarItem*> => {
let where_clauses = let where_clauses =
@ -25,7 +26,16 @@ pub Grammar: Grammar =
type_parameters: tps.unwrap_or(vec![]), type_parameters: tps.unwrap_or(vec![]),
parameters: parameters.unwrap_or(vec![]), parameters: parameters.unwrap_or(vec![]),
where_clauses: where_clauses, where_clauses: where_clauses,
items: uses.into_iter().chain(items).collect() } items: uses.into_iter().chain(items).collect(),
algorithm: algorithm }
};
Algorithm: Algorithm =
"[" <lo:@L> <t:StringLiteral> <hi:@R> "]" => {
Algorithm {
span: Span(lo, hi),
text: t
}
}; };
GrammarTypeParameters: Vec<TypeParameter> = GrammarTypeParameters: Vec<TypeParameter> =