diff --git a/lalrpop-test/build.rs b/lalrpop-test/build.rs index 705c09b..c1cebd4 100644 --- a/lalrpop-test/build.rs +++ b/lalrpop-test/build.rs @@ -5,6 +5,7 @@ fn main() { .emit_comments(true) .force_build(true) .unit_test() + .log_debug() .process_current_dir() .unwrap(); } diff --git a/lalrpop-test/src/associated_types.lalrpop b/lalrpop-test/src/associated_types.lalrpop index b1cc65c..182798c 100644 --- a/lalrpop-test/src/associated_types.lalrpop +++ b/lalrpop-test/src/associated_types.lalrpop @@ -1,7 +1,8 @@ use std::str::FromStr; use associated_types_lib::ParseCallbacks; -grammar

(callbacks: &mut P) where P: ParseCallbacks; +// FIXME: the 'a shouldn't be needed +grammar<'a, P>(callbacks: &'a mut P) where P: ParseCallbacks, P: 'a; pub Term: P::Term = { => n.into(), diff --git a/lalrpop-test/src/where_clause_with_forall.lalrpop b/lalrpop-test/src/where_clause_with_forall.lalrpop index dd5317f..cb12cfa 100644 --- a/lalrpop-test/src/where_clause_with_forall.lalrpop +++ b/lalrpop-test/src/where_clause_with_forall.lalrpop @@ -1,6 +1,7 @@ use std::str::FromStr; -grammar(logger: &mut F) where F: for<'a> FnMut(&'a str); +// FIXME: 'logger shouldn't be needed +grammar<'logger, F>(logger: &'logger mut F) where F: for<'a> FnMut(&'a str) + 'logger; pub Term: i32 = { => { diff --git a/lalrpop-util/src/state_machine.rs b/lalrpop-util/src/state_machine.rs index d886d85..809018d 100644 --- a/lalrpop-util/src/state_machine.rs +++ b/lalrpop-util/src/state_machine.rs @@ -147,9 +147,14 @@ pub trait ParserAction: Copy + Clone + Debug { fn is_error(self) -> bool; } -pub struct SimulatedReduce { - states_to_pop: usize, - nonterminal_produced: D::NonterminalIndex, +pub enum SimulatedReduce { + Reduce { + states_to_pop: usize, + nonterminal_produced: D::NonterminalIndex, + }, + + // This reduce is the "start" fn, so the parse is done. + Accept, } // These aliases are an elaborate hack to get around @@ -492,15 +497,22 @@ where // If we encounter a reduce action, we need to simulate its // effect on the state stack. if let Some(reduce_action) = action.as_reduce() { - let SimulatedReduce { - states_to_pop, - nonterminal_produced, - } = self.definition.simulate_reduce(reduce_action); - states_len -= states_to_pop; - states.truncate(states_len); - let top = states[states_len - 1]; - let next_state = self.definition.goto(top, nonterminal_produced); - states.push(next_state); + match self.definition.simulate_reduce(reduce_action) { + SimulatedReduce::Reduce { + states_to_pop, + nonterminal_produced, + } => { + states_len -= states_to_pop; + states.truncate(states_len); + let top = states[states_len - 1]; + let next_state = self.definition.goto(top, nonterminal_produced); + states.push(next_state); + } + + SimulatedReduce::Accept => { + return true; + } + } } else { // If we encounter a shift action, we DO accept. assert!(action.is_shift()); diff --git a/lalrpop/src/grammar/free_variables/mod.rs b/lalrpop/src/grammar/free_variables/mod.rs index d4f5f80..c50c943 100644 --- a/lalrpop/src/grammar/free_variables/mod.rs +++ b/lalrpop/src/grammar/free_variables/mod.rs @@ -152,24 +152,20 @@ impl FreeVariables for parse_tree::TypeBound { parse_tree::TypeBound::Lifetime(l) => free_lifetime(type_parameters, l), parse_tree::TypeBound::Fn { forall, - path, + path: _, parameters, ret, - } => path - .free_variables(type_parameters) + } => parameters.free_variables(type_parameters) .into_iter() - .chain(parameters.free_variables(type_parameters)) .chain(ret.free_variables(type_parameters)) .filter(|tp| !forall.contains(tp)) .collect(), parse_tree::TypeBound::Trait { forall, - path, + path: _, parameters, - } => path - .free_variables(type_parameters) + } => parameters.free_variables(type_parameters) .into_iter() - .chain(parameters.free_variables(type_parameters)) .filter(|tp| !forall.contains(tp)) .collect(), } diff --git a/lalrpop/src/grammar/repr.rs b/lalrpop/src/grammar/repr.rs index fa9deb6..6d45557 100644 --- a/lalrpop/src/grammar/repr.rs +++ b/lalrpop/src/grammar/repr.rs @@ -14,7 +14,7 @@ use util::Sep; // These concepts we re-use wholesale pub use grammar::parse_tree::{ Annotation, InternToken, Lifetime, NonterminalString, Path, Span, TerminalLiteral, - TerminalString, TypeBound, TypeBoundParameter, TypeParameter, Visibility, + TerminalString, TypeBound, TypeParameter, Visibility, }; #[derive(Clone, Debug)] diff --git a/lalrpop/src/lr1/codegen/base.rs b/lalrpop/src/lr1/codegen/base.rs index a9a4da9..8b13595 100644 --- a/lalrpop/src/lr1/codegen/base.rs +++ b/lalrpop/src/lr1/codegen/base.rs @@ -347,10 +347,17 @@ impl<'codegen, 'grammar, W: Write, C> CodeGenerator<'codegen, 'grammar, W, C> { /// all type parameters are constrained, even if they are not /// used. pub fn phantom_data_type(&self) -> String { - format!( - "::std::marker::PhantomData<({})>", - Sep(", ", &self.grammar.non_lifetime_type_parameters()) - ) + let phantom_bits: Vec<_> = self + .grammar + .type_parameters + .iter() + .map(|tp| match *tp { + TypeParameter::Lifetime(ref l) => format!("&{} ()", l), + + TypeParameter::Id(ref id) => id.to_string(), + }) + .collect(); + format!("::std::marker::PhantomData<({})>", Sep(", ", &phantom_bits),) } /// Returns expression that captures the user-declared type @@ -358,9 +365,19 @@ impl<'codegen, 'grammar, W: Write, C> CodeGenerator<'codegen, 'grammar, W, C> { /// all type parameters are constrained, even if they are not /// used. pub fn phantom_data_expr(&self) -> String { + let phantom_bits: Vec<_> = self + .grammar + .type_parameters + .iter() + .map(|tp| match *tp { + TypeParameter::Lifetime(_) => format!("&()"), + + TypeParameter::Id(ref id) => id.to_string(), + }) + .collect(); format!( "::std::marker::PhantomData::<({})>", - Sep(", ", &self.grammar.non_lifetime_type_parameters()) + Sep(", ", &phantom_bits), ) } } diff --git a/lalrpop/src/lr1/codegen/parse_table.rs b/lalrpop/src/lr1/codegen/parse_table.rs index 799a47a..dadb6c6 100644 --- a/lalrpop/src/lr1/codegen/parse_table.rs +++ b/lalrpop/src/lr1/codegen/parse_table.rs @@ -34,222 +34,6 @@ pub fn compile<'grammar, W: Write>( table_driven.write() } -// We create three parse tables: -// -// - `ACTION[state * num_states + terminal]: i32`: given a state and next token, -// yields an integer indicating whether to shift/reduce (see below) -// - `EOF_ACTION[state]: i32`: as above, but for the EOF token -// - `GOTO[state * num_states + nonterminal]: i32`: index + 1 of state to jump to when given -// nonterminal is pushed (no error is possible) -// -// For the `ACTION` and `EOF_ACTION` tables, the value is an `i32` and -// its interpretation varies depending on whether it is positive or -// negative: -// -// - if zero, parse error. -// - if a positive integer (not zero), it is the next state to shift to. -// - if a negative integer (not zero), it is the index of a reduction -// action to execute (actually index + 1). -// -// We maintain two stacks: one is a stack of state indexes (each an -// u32). The other is a stack of values and spans: `(L, T, L)`. `L` is -// the location type and represents the start/end span. `T` is the -// value of the symbol. The type `T` is an `enum` that we synthesize -// which contains a variant for all the possibilities: -// -// ``` -// enum Value<> { -// // One variant for each terminal: -// Term1(Ty1), -// ... -// TermN(TyN), -// -// // One variant for each nonterminal: -// Nt1(Ty1), -// ... -// NtN(TyN), -// } -// ``` -// -// The action parser function looks like this (pseudo-code): -// -// ``` -// fn parse_fn(tokens: TOKENS) -> Result -// where TOKENS: Iterator> -// { -// let mut states = vec![0]; // initial state is zero -// let mut symbols = vec![]; -// 'shift: loop { -// // Code to shift the next symbol and determine which terminal -// // it is; emitted by `shift_symbol()`. -// let lookahead = match tokens.next() { -// Some(Ok(l)) => l, -// None => break 'shift, -// Some(Err(e)) => return Err(e), -// }; -// let integer = match lookahead { -// (_, PatternForTerminal0(...), _) => 0, -// ... -// }; -// -// // Code to process next symbol. -// 'inner: loop { -// let symbol = match lookahead { -// (l, PatternForTerminal0(...), r) => { -// (l, Value::VariantForTerminal0(...), r), -// } -// ... -// }; -// let state = *states.last().unwrap() as usize; -// let action = ACTION[state * NUM_STATES + integer]; -// if action > 0 { // shift -// states.push(action - 1); -// symbols.push(symbol); -// continue 'shift; -// } else if action < 0 { // reduce -// if let Some(r) = reduce(action, Some(&lookahead.0), &mut states, &mut symbols) { -// // Give errors from within grammar a higher priority -// if r.is_err() { -// return r; -// } -// return Err(lalrpop_util::ParseError::ExtraToken { token: lookahead }); -// } -// } else { -// // Error recovery code: emitted by `try_error_recovery` -// let mut err_lookahead = Some(lookahead); -// let mut err_integer = Some(integer); -// match error_recovery(&mut tokens, &mut states, &mut symbols, last_location, -// &mut err_lookahead, &mut err_integer) { -// Err(e) => return e, -// Ok(Some(v)) => return Ok(v), -// Ok(None) => { } -// } -// match (err_lookahead, err_integer) { -// (Some(l), Some(i)) => { -// lookahead = l; -// integer = i; -// continue 'inner; -// } -// _ => break 'shift; -// } -// } -// } -// } -// -// // Process EOF -// while let Some(state) = self.states.pop() { -// let action = EOF_ACTION[state * NUM_STATES]; -// if action < 0 { // reduce -// try!(reduce(action, None, &mut states, &mut symbols)); -// } else { -// let mut err_lookahead = None; -// let mut err_integer = None; -// match error_recovery(&mut tokens, &mut states, &mut symbols, last_location, -// &mut err_lookahead, &mut err_integer) { -// Err(e) => return e, -// Ok(Some(v)) => return Ok(v), -// Ok(None) => { } -// } -// } -// } -// } -// -// // generated by `emit_reduce_actions()` -// fn reduce(action: i32, lookahead_start: Option<&L>, -// states: &mut Vec, symbols: &mut Vec<(L, Symbol, L)) -// -> Option> { -// let nonterminal = match -action { -// 0 => { -// // Execute reduce action 0 to produce nonterminal N, popping from stacks etc -// // (generated by `emit_reduce_action()`). If this is a fallible action, -// // it may return `Some(Err)`, and if this is a reduce of the start NT, -// // it may return `Some(Ok)`. -// states.pop(); // however many times -// symbols.pop(); // however many times -// let data = action_fn0(...); -// symbols.push((l, Value::VariantForNonterminalN(data), r)); -// N -// } -// ... -// }; -// let state = *states.last().unwrap(); -// let next_state = GOTO[state * NUM_STATES + nonterminal] - 1; -// state_stack.push(next_state); -// None -// } -// -// generated by `write_error_recovery_fn` -// fn error_recovery(...) { -// let mut dropped_tokens = vec![]; -// -// // First, reduce as long as we can with the `!` token as lookahead -// loop { -// let state = *states.last().unwrap() as usize; -// let action = ACTION[(state + 1) * ACTIONS_PER_STATE - 1]; -// if action >= 0 { -// break; -// } -// if let Some(r) = reduce(action, None, &mut states, &mut symbols) { -// return r; -// } -// } -// -// let top0; -// 'find_state: loop { -// // See if there is a state that can shift `!` token. If so, -// // break. -// for top in (0..states.len()).rev() { -// let state = states[top]; -// let action = ACTION[state * ACTIONS_PER_STATE + 1]; -// if action <= 0 { continue; } -// let error_state = action - 1; -// if accepts(error_state, &states[..top+1], *opt-integer) { -// top0 = top; -// break 'find_state; -// } -// } -// -// // Else, drop a token from the input and try again. -// 'eof: loop { -// match opt_lookahead.take() { -// None => { -// // No more tokens to drop -// return Err(...); -// } -// Some(mut lookahead) => { -// dropped_tokens.push(lookahead); -// next_token() -// opt_lookahead = Some(match tokens.next() { -// Some(Ok(l)) => l, -// None => break 'eof, -// Some(Err(e)) => return Err(e), -// }); -// opt_integer = Some(match lookahead { -// (_, PatternForTerminal0(...), _) => 0, -// ... -// }); -// continue 'find_state; -// } -// } -// } -// opt_lookahead = None; -// opt_integer = None; -// } -// -// let top = top0; -// let start = /* figure out "start" of error */; -// let end = /* figure out "end" of error */; -// states.truncate(top + 1); -// symbols.truncate(top); -// let recover_state = states[top]; -// let error_state = ACTION[recover_state * ACTIONS_PER_STATE + 1] - 1; -// states.push(error_state); -// let recovery = ErrorRecovery { dropped_tokens, ... }; -// symbols.push((start, Symbol::Termerror(recovery), end)); -// Ok(None) -// } -// ``` - enum Comment<'a, T> { Goto(T, usize), Error(T), @@ -354,6 +138,10 @@ impl<'ascent, 'grammar, W: Write> CodeGenerator<'ascent, 'grammar, W, TableDrive self.write_parse_mod(|this| { try!(this.write_value_type_defn()); try!(this.write_parse_table()); + try!(this.write_machine_definition()); + try!(this.write_token_to_integer_fn()); + try!(this.write_token_to_symbol_fn()); + try!(this.write_simulate_reduce_fn()); try!(this.write_parser_fn()); try!(this.write_error_recovery_fn()); try!(this.write_accepts_fn()); @@ -364,6 +152,241 @@ impl<'ascent, 'grammar, W: Write> CodeGenerator<'ascent, 'grammar, W, TableDrive }) } + fn write_machine_definition(&mut self) -> io::Result<()> { + let grammar_type_params = Sep(", ", &self.grammar.type_parameters); + let grammar_where_clauses = Sep(", ", &self.grammar.where_clauses); + // let parse_error_type = self.types.parse_error_type(); + let error_type = self.types.error_type(); + let token_type = self.types.terminal_token_type(); + // let spanned_symbol_type = self.spanned_symbol_type(); + // let triple_type = self.types.triple_type(); + let loc_type = self.types.terminal_loc_type(); + // let actions_per_state = self.grammar.terminals.all.len(); + let start_type = self.types.nonterminal_type(&self.start_symbol); + let state_type = self.custom.state_type; + let symbol_type = self.symbol_type(); + let phantom_data_type = self.phantom_data_type(); + let phantom_data_expr = self.phantom_data_expr(); + + rust!( + self.out, + "pub struct {p}StateMachine<{gtp}>", + p = self.prefix, + gtp = grammar_type_params + ); + rust!(self.out, "where {gwc}", gwc = grammar_where_clauses); + rust!(self.out, "{{"); + for param in &self.grammar.parameters { + rust!(self.out, "{name}: {ty},", name = param.name, ty = param.ty,); + } + rust!( + self.out, + "{p}phantom: {phantom},", + p = self.prefix, + phantom = phantom_data_type, + ); + rust!(self.out, "}}"); + + rust!( + self.out, + "impl<{gtp}> {p}state_machine::ParserDefinition for {p}StateMachine<{gtp}>", + p = self.prefix, + gtp = grammar_type_params, + ); + rust!(self.out, "where {gwc}", gwc = grammar_where_clauses); + rust!(self.out, "{{"); + rust!(self.out, "type Location = {t};", t = loc_type); + rust!(self.out, "type Error = {t};", t = error_type); + rust!(self.out, "type Token = {t};", t = token_type); + rust!(self.out, "type TokenIndex = usize;"); + rust!( + self.out, + "type Symbol = {symbol_type};", + symbol_type = symbol_type, + ); + rust!(self.out, "type Success = {t};", t = start_type); + rust!(self.out, "type StateIndex = {t};", t = state_type); + rust!(self.out, "type Action = {t};", t = state_type); + rust!(self.out, "type ReduceIndex = {t};", t = state_type); + rust!(self.out, "type NonterminalIndex = usize;"); + + rust!(self.out, ""); + rust!(self.out, "#[inline]"); + rust!(self.out, "fn start_location(&self) -> Self::Location {{"); + rust!(self.out, " Default::default()"); + rust!(self.out, "}}"); + + rust!(self.out, ""); + rust!(self.out, "#[inline]"); + rust!( + self.out, + "fn token_to_index(&self, token: &Self::Token) -> Option {{" + ); + rust!( + self.out, + "{p}token_to_integer(token, {phantom})", + p = self.prefix, + phantom = phantom_data_expr, + ); + rust!(self.out, "}}"); + + rust!(self.out, ""); + rust!(self.out, "#[inline]"); + rust!( + self.out, + "fn action(&self, state: {state_type}, integer: usize) -> {state_type} {{", + state_type = state_type + ); + rust!( + self.out, + "{p}ACTION[((state * {num_term}) as usize) + integer]", + p = self.prefix, + num_term = self.grammar.terminals.all.len(), + ); + rust!(self.out, "}}"); + + rust!(self.out, ""); + rust!(self.out, "#[inline]"); + rust!( + self.out, + "fn error_action(&self, state: {state_type}) -> {state_type} {{", + state_type = state_type, + ); + rust!( + self.out, + "{p}ACTION[((state * {num_term}) as usize) + {num_term}]", + p = self.prefix, + num_term = self.grammar.terminals.all.len(), + ); + rust!(self.out, "}}"); + + rust!(self.out, ""); + rust!(self.out, "#[inline]"); + rust!( + self.out, + "fn eof_action(&self, state: {state_type}) -> {state_type} {{", + state_type = state_type, + ); + rust!(self.out, "{p}EOF_ACTION[state as usize]", p = self.prefix,); + rust!(self.out, "}}"); + + rust!(self.out, ""); + rust!(self.out, "#[inline]"); + rust!( + self.out, + "fn goto(&self, state: {state_type}, nt: usize) -> {state_type} {{", + state_type = state_type, + ); + rust!( + self.out, + "{p}GOTO[(state * {num_non_term}) as usize + nt] - 1", + p = self.prefix, + num_non_term = self.grammar.nonterminals.len(), + ); + rust!(self.out, "}}"); + + rust!(self.out, ""); + rust!( + self.out, + "fn token_to_symbol(&self, token_index: usize, token: Self::Token) -> Self::Symbol {{" + ); + rust!( + self.out, + "{p}token_to_symbol(token_index, token, {phantom})", + p = self.prefix, + phantom = phantom_data_expr, + ); + rust!(self.out, "}}"); + + rust!(self.out, ""); + rust!( + self.out, + "fn expected_tokens(&self, state: {state_type}) -> Vec {{", + state_type = state_type, + ); + rust!( + self.out, + "{p}expected_tokens(state as usize)", + p = self.prefix + ); + rust!(self.out, "}}"); + + rust!(self.out, ""); + rust!(self.out, "#[inline]"); + rust!(self.out, "fn uses_error_recovery(&self) -> bool {{"); + rust!(self.out, "{}", self.grammar.uses_error_recovery); + rust!(self.out, "}}"); + + rust!(self.out, ""); + rust!(self.out, "#[inline]"); + rust!(self.out, "fn error_recovery_symbol("); + rust!(self.out, "&self,"); + rust!( + self.out, + "recovery: {p}state_machine::ErrorRecovery,", + p = self.prefix + ); + rust!(self.out, ") -> Self::Symbol {{"); + if self.grammar.uses_error_recovery { + let error_variant = + self.variant_name_for_symbol(&Symbol::Terminal(TerminalString::Error)); + rust!( + self.out, + "{p}Symbol::{e}(recovery)", + p = self.prefix, + e = error_variant + ); + } else { + rust!( + self.out, + "panic!(\"error recovery not enabled for this grammar\")" + ) + } + rust!(self.out, "}}"); + + rust!(self.out, ""); + rust!(self.out, "fn reduce("); + rust!(self.out, "&self,"); + rust!(self.out, "action: {state_type},", state_type = state_type); + rust!(self.out, "start_location: Option<&Self::Location>,"); + rust!( + self.out, + "states: &mut Vec<{state_type}>,", + state_type = state_type + ); + rust!( + self.out, + "symbols: &mut Vec<{p}state_machine::SymbolTriple>,", + p = self.prefix, + ); + rust!( + self.out, + ") -> Option<{p}state_machine::ParseResult> {{", + p = self.prefix, + ); + rust!(self.out, "panic!()"); + rust!(self.out, "}}"); + + rust!(self.out, ""); + rust!( + self.out, + "fn simulate_reduce(&self, action: {state_type}) -> {p}state_machine::SimulatedReduce {{", + p = self.prefix, + state_type = state_type, + ); + rust!( + self.out, + "{p}simulate_reduce(action, {phantom})", + p = self.prefix, + phantom = phantom_data_expr, + ); + rust!(self.out, "}}"); + + rust!(self.out, "}}"); + + Ok(()) + } + fn write_value_type_defn(&mut self) -> io::Result<()> { // sometimes some of the variants are not used, particularly // if we are generating multiple parsers from the same file: @@ -372,14 +395,14 @@ impl<'ascent, 'grammar, W: Write> CodeGenerator<'ascent, 'grammar, W, TableDrive self.out, "pub enum {}Symbol<{}>", self.prefix, - Sep(", ", &self.custom.symbol_type_params) + Sep(", ", &self.custom.symbol_type_params), ); if !self.custom.symbol_where_clauses.is_empty() { rust!( self.out, " where {}", - Sep(", ", &self.custom.symbol_where_clauses) + Sep(", ", &self.custom.symbol_where_clauses), ); } @@ -422,6 +445,7 @@ impl<'ascent, 'grammar, W: Write> CodeGenerator<'ascent, 'grammar, W, TableDrive .variant_names .insert(Symbol::Nonterminal(nt.clone()), name.clone()); } + rust!(self.out, "}}"); Ok(()) } @@ -766,14 +790,33 @@ impl<'ascent, 'grammar, W: Write> CodeGenerator<'ascent, 'grammar, W, TableDrive Ok(()) } - fn token_to_integer(&mut self, integer: &str, lookahead: &str) -> io::Result<()> { - rust!( - self.out, - "{p}{integer} = match {p}{lookahead}.1 {{", - integer = integer, - lookahead = lookahead, - p = self.prefix + fn write_token_to_integer_fn(&mut self) -> io::Result<()> { + let token_type = self.types.terminal_token_type(); + + let parameters = vec![ + format!( + "{p}token: &{token_type}", + p = self.prefix, + token_type = token_type, + ), + format!("_: {}", self.phantom_data_type()), + ]; + + try!( + self.out + .fn_header( + &Visibility::Priv, + format!("{p}token_to_integer", p = self.prefix) + ).with_type_parameters(&self.grammar.type_parameters) + .with_where_clauses(&self.grammar.where_clauses) + .with_parameters(parameters) + .with_return_type(format!("Option")) + .emit() ); + rust!(self.out, "{{"); + + rust!(self.out, "match *{p}token {{", p = self.prefix); + for (terminal, index) in self.grammar.terminals.all.iter().zip(0..) { if *terminal == TerminalString::Error { continue; @@ -781,37 +824,78 @@ impl<'ascent, 'grammar, W: Write> CodeGenerator<'ascent, 'grammar, W, TableDrive let pattern = self.grammar.pattern(terminal).map(&mut |_| "_"); rust!( self.out, - "{pattern} if true => {index},", + "{pattern} if true => Some({index}),", pattern = pattern, index = index ); } - rust!(self.out, "_ => {{"); + rust!(self.out, "_ => None,"); + + rust!(self.out, "}}"); + rust!(self.out, "}}"); + + Ok(()) + } + + fn token_to_integer(&mut self, integer: &str, lookahead: &str) -> io::Result<()> { + let phantom_data_expr = self.phantom_data_expr(); + + rust!( + self.out, + "{p}{integer} = match {p}token_to_integer(&{p}{lookahead}.1, {pde}) {{", + p = self.prefix, + integer = integer, + lookahead = lookahead, + pde = phantom_data_expr, + ); + rust!(self.out, "Some({p}i) => {p}i,", p = self.prefix,); + rust!(self.out, "None => {{",); let prefix = self.prefix; - try!(self.let_unrecognized_token_error( + self.let_unrecognized_token_error( "error", - &format!("Some({p}{lookahead})", lookahead = lookahead, p = prefix) - )); - rust!(self.out, "return Err({p}error);", p = self.prefix); + &format!("Some({p}{lookahead})", lookahead = lookahead, p = prefix), + )?; + rust!(self.out, "return Err({p}error);", p = prefix); rust!(self.out, "}}"); rust!(self.out, "}};"); Ok(()) } - fn token_to_symbol(&mut self) -> io::Result<()> { - rust!( - self.out, - "let {}symbol = match {}integer {{", - self.prefix, - self.prefix + fn write_token_to_symbol_fn(&mut self) -> io::Result<()> { + let symbol_type = self.symbol_type(); + let token_type = self.types.terminal_token_type(); + + let parameters = vec![ + format!("{p}token_index: usize", p = self.prefix,), + format!( + "{p}token: {token_type}", + p = self.prefix, + token_type = token_type, + ), + format!("_: {}", self.phantom_data_type()), + ]; + + try!( + self.out + .fn_header( + &Visibility::Priv, + format!("{p}token_to_symbol", p = self.prefix), + ).with_type_parameters(&self.grammar.type_parameters) + .with_where_clauses(&self.grammar.where_clauses) + .with_parameters(parameters) + .with_return_type(symbol_type) + .emit() ); + rust!(self.out, "{{"); + + rust!(self.out, "match {p}token_index {{", p = self.prefix,); for (terminal, index) in self.grammar.terminals.all.iter().zip(0..) { if *terminal == TerminalString::Error { continue; } - rust!(self.out, "{} => match {}lookahead.1 {{", index, self.prefix); + rust!(self.out, "{} => match {}token {{", index, self.prefix); let mut pattern_names = vec![]; let pattern = self.grammar.pattern(terminal).map(&mut |_| { @@ -829,11 +913,11 @@ impl<'ascent, 'grammar, W: Write> CodeGenerator<'ascent, 'grammar, W, TableDrive let variant_name = self.variant_name_for_symbol(&Symbol::Terminal(terminal.clone())); rust!( self.out, - "{} => {}Symbol::{}(({})),", - pattern, - self.prefix, - variant_name, - pattern_names.join(", ") + "{pattern} => {p}Symbol::{variant_name}(({pattern_names})),", + pattern = pattern, + p = self.prefix, + variant_name = variant_name, + pattern_names = pattern_names.join(", "), ); rust!(self.out, "_ => unreachable!(),"); rust!(self.out, "}},"); @@ -841,7 +925,20 @@ impl<'ascent, 'grammar, W: Write> CodeGenerator<'ascent, 'grammar, W, TableDrive rust!(self.out, "_ => unreachable!(),"); - rust!(self.out, "}};"); + rust!(self.out, "}}"); + rust!(self.out, "}}"); + Ok(()) + } + + fn token_to_symbol(&mut self) -> io::Result<()> { + let phantom_data_expr = self.phantom_data_expr(); + + rust!( + self.out, + "let {p}symbol = {p}token_to_symbol({p}integer, {p}lookahead.1, {pde});", + p = self.prefix, + pde = phantom_data_expr, + ); Ok(()) } @@ -1164,7 +1261,7 @@ impl<'ascent, 'grammar, W: Write> CodeGenerator<'ascent, 'grammar, W, TableDrive Ok(()) } - fn variant_name_for_symbol(&mut self, s: &Symbol) -> String { + fn variant_name_for_symbol(&self, s: &Symbol) -> String { self.custom.variant_names[s].clone() } @@ -1886,6 +1983,96 @@ impl<'ascent, 'grammar, W: Write> CodeGenerator<'ascent, 'grammar, W, TableDrive Ok(()) } + fn write_simulate_reduce_fn(&mut self) -> io::Result<()> { + let state_type = self.custom.state_type; + + let parameters = vec![ + format!( + "{p}reduce_index: {state_type}", + p = self.prefix, + state_type = state_type, + ), + format!("_: {}", self.phantom_data_type()), + ]; + + try!( + self.out + .fn_header( + &Visibility::Priv, + format!("{p}simulate_reduce", p = self.prefix), + ).with_type_parameters(&self.grammar.type_parameters) + .with_where_clauses(&self.grammar.where_clauses) + .with_parameters(parameters) + .with_return_type(format!( + "{p}state_machine::SimulatedReduce<{p}StateMachine<{gtp}>>", + p = self.prefix, + gtp = Sep(", ", &self.grammar.type_parameters), + )).emit() + ); + rust!(self.out, "{{"); + + rust!(self.out, "match {p}reduce_index {{", p = self.prefix,); + for (production, index) in self + .grammar + .nonterminals + .values() + .flat_map(|nt| &nt.productions) + .zip(0..) + { + if Tls::session().emit_comments { + rust!(self.out, "// simulate {:?}", production); + } + + // if we just reduced the start symbol, that is also an accept criteria + if production.nonterminal == self.start_symbol { + rust!( + self.out, + "{index} => {p}state_machine::SimulatedReduce::Accept,", + index = index, + p = self.prefix, + ); + } else { + let num_symbols = production.symbols.len(); + let nt = self + .custom + .all_nonterminals + .iter() + .position(|x| *x == production.nonterminal) + .unwrap(); + rust!(self.out, "{} => {{", index); + if DEBUG_PRINT { + rust!( + self.out, + "println!(r##\"accepts: simulating {:?}\"##);", + production + ); + } + rust!( + self.out, + "{p}state_machine::SimulatedReduce::Reduce {{", + p = self.prefix, + ); + rust!( + self.out, + "states_to_pop: {num_symbols},", + num_symbols = num_symbols, + ); + rust!(self.out, "nonterminal_produced: {nt},", nt = nt); + rust!(self.out, "}}"); + rust!(self.out, "}}"); + } + } + rust!( + self.out, + "_ => panic!(\"invalid reduction index {{}}\", {}reduce_index)", + self.prefix, + ); + rust!(self.out, "}}"); // end match + + rust!(self.out, "}}"); + Ok(()) + } + /// The `accepts` function /// /// ```ignore @@ -1918,6 +2105,7 @@ impl<'ascent, 'grammar, W: Write> CodeGenerator<'ascent, 'grammar, W, TableDrive return Ok(()); } + let phantom_data_expr = self.phantom_data_expr(); let actions_per_state = self.grammar.terminals.all.len(); let parameters = vec![ format!( @@ -2020,54 +2208,23 @@ impl<'ascent, 'grammar, W: Write> CodeGenerator<'ascent, 'grammar, W, TableDrive // effect on the state stack. rust!( self.out, - "let ({p}to_pop, {p}nt) = match -{p}action {{", - p = self.prefix + "let ({p}to_pop, {p}nt) = match {p}simulate_reduce(-({p}action + 1), {pde}) {{", + p = self.prefix, + pde = phantom_data_expr, ); - for (production, index) in self - .grammar - .nonterminals - .values() - .flat_map(|nt| &nt.productions) - .zip(1..) - { - if Tls::session().emit_comments { - rust!(self.out, "// simulate {:?}", production); - } - - // if we just reduced the start symbol, that is also an accept criteria - if production.nonterminal == self.start_symbol { - rust!(self.out, "{} => return true,", index); - } else { - let num_symbols = production.symbols.len(); - let nt = self - .custom - .all_nonterminals - .iter() - .position(|x| *x == production.nonterminal) - .unwrap(); - rust!(self.out, "{} => {{", index); - if DEBUG_PRINT { - rust!( - self.out, - "println!(r##\"accepts: simulating {:?}\"##);", - production - ); - } - rust!( - self.out, - "({num_symbols}, {nt})", - num_symbols = num_symbols, - nt = nt - ); - rust!(self.out, "}}"); - } - } rust!( self.out, - "_ => panic!(\"invalid action code {{}}\", {}action)", - self.prefix + "{p}state_machine::SimulatedReduce::Reduce {{", + p = self.prefix, ); - rust!(self.out, "}};"); // end match + rust!(self.out, "states_to_pop, nonterminal_produced",); + rust!(self.out, "}} => (states_to_pop, nonterminal_produced),",); + rust!( + self.out, + "{p}state_machine::SimulatedReduce::Accept => return true,", + p = self.prefix, + ); + rust!(self.out, "}};"); rust!(self.out, "{p}states_len -= {p}to_pop;", p = self.prefix); rust!( @@ -2110,9 +2267,9 @@ impl<'ascent, 'grammar, W: Write> CodeGenerator<'ascent, 'grammar, W, TableDrive fn symbol_type(&self) -> String { format!( - "{}Symbol<{}>", - self.prefix, - Sep(", ", &self.custom.symbol_type_params) + "{p}Symbol<{stp}>", + p = self.prefix, + stp = Sep(", ", &self.custom.symbol_type_params), ) }