generate state machine definition (but do not use to drive parsing)

This version however requires all lifetimes to be explicit. Suboptimal.
This commit is contained in:
Niko Matsakis 2018-07-18 07:43:19 +03:00 committed by Markus Westerlind
parent d0d6cc6bbb
commit 080611a725
8 changed files with 505 additions and 320 deletions

View File

@ -5,6 +5,7 @@ fn main() {
.emit_comments(true)
.force_build(true)
.unit_test()
.log_debug()
.process_current_dir()
.unwrap();
}

View File

@ -1,7 +1,8 @@
use std::str::FromStr;
use associated_types_lib::ParseCallbacks;
grammar<P>(callbacks: &mut P) where P: ParseCallbacks;
// FIXME: the 'a shouldn't be needed
grammar<'a, P>(callbacks: &'a mut P) where P: ParseCallbacks, P: 'a;
pub Term: P::Term = {
<n:Num> => n.into(),

View File

@ -1,6 +1,7 @@
use std::str::FromStr;
grammar<F>(logger: &mut F) where F: for<'a> FnMut(&'a str);
// FIXME: 'logger shouldn't be needed
grammar<'logger, F>(logger: &'logger mut F) where F: for<'a> FnMut(&'a str) + 'logger;
pub Term: i32 = {
<n:Num> => {

View File

@ -147,9 +147,14 @@ pub trait ParserAction<D: ParserDefinition>: Copy + Clone + Debug {
fn is_error(self) -> bool;
}
pub struct SimulatedReduce<D: ParserDefinition> {
pub enum SimulatedReduce<D: ParserDefinition> {
Reduce {
states_to_pop: usize,
nonterminal_produced: D::NonterminalIndex,
},
// This reduce is the "start" fn, so the parse is done.
Accept,
}
// These aliases are an elaborate hack to get around
@ -492,15 +497,22 @@ where
// If we encounter a reduce action, we need to simulate its
// effect on the state stack.
if let Some(reduce_action) = action.as_reduce() {
let SimulatedReduce {
match self.definition.simulate_reduce(reduce_action) {
SimulatedReduce::Reduce {
states_to_pop,
nonterminal_produced,
} = self.definition.simulate_reduce(reduce_action);
} => {
states_len -= states_to_pop;
states.truncate(states_len);
let top = states[states_len - 1];
let next_state = self.definition.goto(top, nonterminal_produced);
states.push(next_state);
}
SimulatedReduce::Accept => {
return true;
}
}
} else {
// If we encounter a shift action, we DO accept.
assert!(action.is_shift());

View File

@ -152,24 +152,20 @@ impl<T: FreeVariables> FreeVariables for parse_tree::TypeBound<T> {
parse_tree::TypeBound::Lifetime(l) => free_lifetime(type_parameters, l),
parse_tree::TypeBound::Fn {
forall,
path,
path: _,
parameters,
ret,
} => path
.free_variables(type_parameters)
} => parameters.free_variables(type_parameters)
.into_iter()
.chain(parameters.free_variables(type_parameters))
.chain(ret.free_variables(type_parameters))
.filter(|tp| !forall.contains(tp))
.collect(),
parse_tree::TypeBound::Trait {
forall,
path,
path: _,
parameters,
} => path
.free_variables(type_parameters)
} => parameters.free_variables(type_parameters)
.into_iter()
.chain(parameters.free_variables(type_parameters))
.filter(|tp| !forall.contains(tp))
.collect(),
}

View File

@ -14,7 +14,7 @@ use util::Sep;
// These concepts we re-use wholesale
pub use grammar::parse_tree::{
Annotation, InternToken, Lifetime, NonterminalString, Path, Span, TerminalLiteral,
TerminalString, TypeBound, TypeBoundParameter, TypeParameter, Visibility,
TerminalString, TypeBound, TypeParameter, Visibility,
};
#[derive(Clone, Debug)]

View File

@ -347,10 +347,17 @@ impl<'codegen, 'grammar, W: Write, C> CodeGenerator<'codegen, 'grammar, W, C> {
/// all type parameters are constrained, even if they are not
/// used.
pub fn phantom_data_type(&self) -> String {
format!(
"::std::marker::PhantomData<({})>",
Sep(", ", &self.grammar.non_lifetime_type_parameters())
)
let phantom_bits: Vec<_> = self
.grammar
.type_parameters
.iter()
.map(|tp| match *tp {
TypeParameter::Lifetime(ref l) => format!("&{} ()", l),
TypeParameter::Id(ref id) => id.to_string(),
})
.collect();
format!("::std::marker::PhantomData<({})>", Sep(", ", &phantom_bits),)
}
/// Returns expression that captures the user-declared type
@ -358,9 +365,19 @@ impl<'codegen, 'grammar, W: Write, C> CodeGenerator<'codegen, 'grammar, W, C> {
/// all type parameters are constrained, even if they are not
/// used.
pub fn phantom_data_expr(&self) -> String {
let phantom_bits: Vec<_> = self
.grammar
.type_parameters
.iter()
.map(|tp| match *tp {
TypeParameter::Lifetime(_) => format!("&()"),
TypeParameter::Id(ref id) => id.to_string(),
})
.collect();
format!(
"::std::marker::PhantomData::<({})>",
Sep(", ", &self.grammar.non_lifetime_type_parameters())
Sep(", ", &phantom_bits),
)
}
}

View File

@ -34,222 +34,6 @@ pub fn compile<'grammar, W: Write>(
table_driven.write()
}
// We create three parse tables:
//
// - `ACTION[state * num_states + terminal]: i32`: given a state and next token,
// yields an integer indicating whether to shift/reduce (see below)
// - `EOF_ACTION[state]: i32`: as above, but for the EOF token
// - `GOTO[state * num_states + nonterminal]: i32`: index + 1 of state to jump to when given
// nonterminal is pushed (no error is possible)
//
// For the `ACTION` and `EOF_ACTION` tables, the value is an `i32` and
// its interpretation varies depending on whether it is positive or
// negative:
//
// - if zero, parse error.
// - if a positive integer (not zero), it is the next state to shift to.
// - if a negative integer (not zero), it is the index of a reduction
// action to execute (actually index + 1).
//
// We maintain two stacks: one is a stack of state indexes (each an
// u32). The other is a stack of values and spans: `(L, T, L)`. `L` is
// the location type and represents the start/end span. `T` is the
// value of the symbol. The type `T` is an `enum` that we synthesize
// which contains a variant for all the possibilities:
//
// ```
// enum Value<> {
// // One variant for each terminal:
// Term1(Ty1),
// ...
// TermN(TyN),
//
// // One variant for each nonterminal:
// Nt1(Ty1),
// ...
// NtN(TyN),
// }
// ```
//
// The action parser function looks like this (pseudo-code):
//
// ```
// fn parse_fn<TOKENS>(tokens: TOKENS) -> Result<T, Error>
// where TOKENS: Iterator<Item=Result<(Location, Token, Location), Error>>
// {
// let mut states = vec![0]; // initial state is zero
// let mut symbols = vec![];
// 'shift: loop {
// // Code to shift the next symbol and determine which terminal
// // it is; emitted by `shift_symbol()`.
// let lookahead = match tokens.next() {
// Some(Ok(l)) => l,
// None => break 'shift,
// Some(Err(e)) => return Err(e),
// };
// let integer = match lookahead {
// (_, PatternForTerminal0(...), _) => 0,
// ...
// };
//
// // Code to process next symbol.
// 'inner: loop {
// let symbol = match lookahead {
// (l, PatternForTerminal0(...), r) => {
// (l, Value::VariantForTerminal0(...), r),
// }
// ...
// };
// let state = *states.last().unwrap() as usize;
// let action = ACTION[state * NUM_STATES + integer];
// if action > 0 { // shift
// states.push(action - 1);
// symbols.push(symbol);
// continue 'shift;
// } else if action < 0 { // reduce
// if let Some(r) = reduce(action, Some(&lookahead.0), &mut states, &mut symbols) {
// // Give errors from within grammar a higher priority
// if r.is_err() {
// return r;
// }
// return Err(lalrpop_util::ParseError::ExtraToken { token: lookahead });
// }
// } else {
// // Error recovery code: emitted by `try_error_recovery`
// let mut err_lookahead = Some(lookahead);
// let mut err_integer = Some(integer);
// match error_recovery(&mut tokens, &mut states, &mut symbols, last_location,
// &mut err_lookahead, &mut err_integer) {
// Err(e) => return e,
// Ok(Some(v)) => return Ok(v),
// Ok(None) => { }
// }
// match (err_lookahead, err_integer) {
// (Some(l), Some(i)) => {
// lookahead = l;
// integer = i;
// continue 'inner;
// }
// _ => break 'shift;
// }
// }
// }
// }
//
// // Process EOF
// while let Some(state) = self.states.pop() {
// let action = EOF_ACTION[state * NUM_STATES];
// if action < 0 { // reduce
// try!(reduce(action, None, &mut states, &mut symbols));
// } else {
// let mut err_lookahead = None;
// let mut err_integer = None;
// match error_recovery(&mut tokens, &mut states, &mut symbols, last_location,
// &mut err_lookahead, &mut err_integer) {
// Err(e) => return e,
// Ok(Some(v)) => return Ok(v),
// Ok(None) => { }
// }
// }
// }
// }
//
// // generated by `emit_reduce_actions()`
// fn reduce(action: i32, lookahead_start: Option<&L>,
// states: &mut Vec<i32>, symbols: &mut Vec<(L, Symbol, L))
// -> Option<Result<..>> {
// let nonterminal = match -action {
// 0 => {
// // Execute reduce action 0 to produce nonterminal N, popping from stacks etc
// // (generated by `emit_reduce_action()`). If this is a fallible action,
// // it may return `Some(Err)`, and if this is a reduce of the start NT,
// // it may return `Some(Ok)`.
// states.pop(); // however many times
// symbols.pop(); // however many times
// let data = action_fn0(...);
// symbols.push((l, Value::VariantForNonterminalN(data), r));
// N
// }
// ...
// };
// let state = *states.last().unwrap();
// let next_state = GOTO[state * NUM_STATES + nonterminal] - 1;
// state_stack.push(next_state);
// None
// }
//
// generated by `write_error_recovery_fn`
// fn error_recovery(...) {
// let mut dropped_tokens = vec![];
//
// // First, reduce as long as we can with the `!` token as lookahead
// loop {
// let state = *states.last().unwrap() as usize;
// let action = ACTION[(state + 1) * ACTIONS_PER_STATE - 1];
// if action >= 0 {
// break;
// }
// if let Some(r) = reduce(action, None, &mut states, &mut symbols) {
// return r;
// }
// }
//
// let top0;
// 'find_state: loop {
// // See if there is a state that can shift `!` token. If so,
// // break.
// for top in (0..states.len()).rev() {
// let state = states[top];
// let action = ACTION[state * ACTIONS_PER_STATE + 1];
// if action <= 0 { continue; }
// let error_state = action - 1;
// if accepts(error_state, &states[..top+1], *opt-integer) {
// top0 = top;
// break 'find_state;
// }
// }
//
// // Else, drop a token from the input and try again.
// 'eof: loop {
// match opt_lookahead.take() {
// None => {
// // No more tokens to drop
// return Err(...);
// }
// Some(mut lookahead) => {
// dropped_tokens.push(lookahead);
// next_token()
// opt_lookahead = Some(match tokens.next() {
// Some(Ok(l)) => l,
// None => break 'eof,
// Some(Err(e)) => return Err(e),
// });
// opt_integer = Some(match lookahead {
// (_, PatternForTerminal0(...), _) => 0,
// ...
// });
// continue 'find_state;
// }
// }
// }
// opt_lookahead = None;
// opt_integer = None;
// }
//
// let top = top0;
// let start = /* figure out "start" of error */;
// let end = /* figure out "end" of error */;
// states.truncate(top + 1);
// symbols.truncate(top);
// let recover_state = states[top];
// let error_state = ACTION[recover_state * ACTIONS_PER_STATE + 1] - 1;
// states.push(error_state);
// let recovery = ErrorRecovery { dropped_tokens, ... };
// symbols.push((start, Symbol::Termerror(recovery), end));
// Ok(None)
// }
// ```
enum Comment<'a, T> {
Goto(T, usize),
Error(T),
@ -354,6 +138,10 @@ impl<'ascent, 'grammar, W: Write> CodeGenerator<'ascent, 'grammar, W, TableDrive
self.write_parse_mod(|this| {
try!(this.write_value_type_defn());
try!(this.write_parse_table());
try!(this.write_machine_definition());
try!(this.write_token_to_integer_fn());
try!(this.write_token_to_symbol_fn());
try!(this.write_simulate_reduce_fn());
try!(this.write_parser_fn());
try!(this.write_error_recovery_fn());
try!(this.write_accepts_fn());
@ -364,6 +152,241 @@ impl<'ascent, 'grammar, W: Write> CodeGenerator<'ascent, 'grammar, W, TableDrive
})
}
fn write_machine_definition(&mut self) -> io::Result<()> {
let grammar_type_params = Sep(", ", &self.grammar.type_parameters);
let grammar_where_clauses = Sep(", ", &self.grammar.where_clauses);
// let parse_error_type = self.types.parse_error_type();
let error_type = self.types.error_type();
let token_type = self.types.terminal_token_type();
// let spanned_symbol_type = self.spanned_symbol_type();
// let triple_type = self.types.triple_type();
let loc_type = self.types.terminal_loc_type();
// let actions_per_state = self.grammar.terminals.all.len();
let start_type = self.types.nonterminal_type(&self.start_symbol);
let state_type = self.custom.state_type;
let symbol_type = self.symbol_type();
let phantom_data_type = self.phantom_data_type();
let phantom_data_expr = self.phantom_data_expr();
rust!(
self.out,
"pub struct {p}StateMachine<{gtp}>",
p = self.prefix,
gtp = grammar_type_params
);
rust!(self.out, "where {gwc}", gwc = grammar_where_clauses);
rust!(self.out, "{{");
for param in &self.grammar.parameters {
rust!(self.out, "{name}: {ty},", name = param.name, ty = param.ty,);
}
rust!(
self.out,
"{p}phantom: {phantom},",
p = self.prefix,
phantom = phantom_data_type,
);
rust!(self.out, "}}");
rust!(
self.out,
"impl<{gtp}> {p}state_machine::ParserDefinition for {p}StateMachine<{gtp}>",
p = self.prefix,
gtp = grammar_type_params,
);
rust!(self.out, "where {gwc}", gwc = grammar_where_clauses);
rust!(self.out, "{{");
rust!(self.out, "type Location = {t};", t = loc_type);
rust!(self.out, "type Error = {t};", t = error_type);
rust!(self.out, "type Token = {t};", t = token_type);
rust!(self.out, "type TokenIndex = usize;");
rust!(
self.out,
"type Symbol = {symbol_type};",
symbol_type = symbol_type,
);
rust!(self.out, "type Success = {t};", t = start_type);
rust!(self.out, "type StateIndex = {t};", t = state_type);
rust!(self.out, "type Action = {t};", t = state_type);
rust!(self.out, "type ReduceIndex = {t};", t = state_type);
rust!(self.out, "type NonterminalIndex = usize;");
rust!(self.out, "");
rust!(self.out, "#[inline]");
rust!(self.out, "fn start_location(&self) -> Self::Location {{");
rust!(self.out, " Default::default()");
rust!(self.out, "}}");
rust!(self.out, "");
rust!(self.out, "#[inline]");
rust!(
self.out,
"fn token_to_index(&self, token: &Self::Token) -> Option<usize> {{"
);
rust!(
self.out,
"{p}token_to_integer(token, {phantom})",
p = self.prefix,
phantom = phantom_data_expr,
);
rust!(self.out, "}}");
rust!(self.out, "");
rust!(self.out, "#[inline]");
rust!(
self.out,
"fn action(&self, state: {state_type}, integer: usize) -> {state_type} {{",
state_type = state_type
);
rust!(
self.out,
"{p}ACTION[((state * {num_term}) as usize) + integer]",
p = self.prefix,
num_term = self.grammar.terminals.all.len(),
);
rust!(self.out, "}}");
rust!(self.out, "");
rust!(self.out, "#[inline]");
rust!(
self.out,
"fn error_action(&self, state: {state_type}) -> {state_type} {{",
state_type = state_type,
);
rust!(
self.out,
"{p}ACTION[((state * {num_term}) as usize) + {num_term}]",
p = self.prefix,
num_term = self.grammar.terminals.all.len(),
);
rust!(self.out, "}}");
rust!(self.out, "");
rust!(self.out, "#[inline]");
rust!(
self.out,
"fn eof_action(&self, state: {state_type}) -> {state_type} {{",
state_type = state_type,
);
rust!(self.out, "{p}EOF_ACTION[state as usize]", p = self.prefix,);
rust!(self.out, "}}");
rust!(self.out, "");
rust!(self.out, "#[inline]");
rust!(
self.out,
"fn goto(&self, state: {state_type}, nt: usize) -> {state_type} {{",
state_type = state_type,
);
rust!(
self.out,
"{p}GOTO[(state * {num_non_term}) as usize + nt] - 1",
p = self.prefix,
num_non_term = self.grammar.nonterminals.len(),
);
rust!(self.out, "}}");
rust!(self.out, "");
rust!(
self.out,
"fn token_to_symbol(&self, token_index: usize, token: Self::Token) -> Self::Symbol {{"
);
rust!(
self.out,
"{p}token_to_symbol(token_index, token, {phantom})",
p = self.prefix,
phantom = phantom_data_expr,
);
rust!(self.out, "}}");
rust!(self.out, "");
rust!(
self.out,
"fn expected_tokens(&self, state: {state_type}) -> Vec<String> {{",
state_type = state_type,
);
rust!(
self.out,
"{p}expected_tokens(state as usize)",
p = self.prefix
);
rust!(self.out, "}}");
rust!(self.out, "");
rust!(self.out, "#[inline]");
rust!(self.out, "fn uses_error_recovery(&self) -> bool {{");
rust!(self.out, "{}", self.grammar.uses_error_recovery);
rust!(self.out, "}}");
rust!(self.out, "");
rust!(self.out, "#[inline]");
rust!(self.out, "fn error_recovery_symbol(");
rust!(self.out, "&self,");
rust!(
self.out,
"recovery: {p}state_machine::ErrorRecovery<Self>,",
p = self.prefix
);
rust!(self.out, ") -> Self::Symbol {{");
if self.grammar.uses_error_recovery {
let error_variant =
self.variant_name_for_symbol(&Symbol::Terminal(TerminalString::Error));
rust!(
self.out,
"{p}Symbol::{e}(recovery)",
p = self.prefix,
e = error_variant
);
} else {
rust!(
self.out,
"panic!(\"error recovery not enabled for this grammar\")"
)
}
rust!(self.out, "}}");
rust!(self.out, "");
rust!(self.out, "fn reduce(");
rust!(self.out, "&self,");
rust!(self.out, "action: {state_type},", state_type = state_type);
rust!(self.out, "start_location: Option<&Self::Location>,");
rust!(
self.out,
"states: &mut Vec<{state_type}>,",
state_type = state_type
);
rust!(
self.out,
"symbols: &mut Vec<{p}state_machine::SymbolTriple<Self>>,",
p = self.prefix,
);
rust!(
self.out,
") -> Option<{p}state_machine::ParseResult<Self>> {{",
p = self.prefix,
);
rust!(self.out, "panic!()");
rust!(self.out, "}}");
rust!(self.out, "");
rust!(
self.out,
"fn simulate_reduce(&self, action: {state_type}) -> {p}state_machine::SimulatedReduce<Self> {{",
p = self.prefix,
state_type = state_type,
);
rust!(
self.out,
"{p}simulate_reduce(action, {phantom})",
p = self.prefix,
phantom = phantom_data_expr,
);
rust!(self.out, "}}");
rust!(self.out, "}}");
Ok(())
}
fn write_value_type_defn(&mut self) -> io::Result<()> {
// sometimes some of the variants are not used, particularly
// if we are generating multiple parsers from the same file:
@ -372,14 +395,14 @@ impl<'ascent, 'grammar, W: Write> CodeGenerator<'ascent, 'grammar, W, TableDrive
self.out,
"pub enum {}Symbol<{}>",
self.prefix,
Sep(", ", &self.custom.symbol_type_params)
Sep(", ", &self.custom.symbol_type_params),
);
if !self.custom.symbol_where_clauses.is_empty() {
rust!(
self.out,
" where {}",
Sep(", ", &self.custom.symbol_where_clauses)
Sep(", ", &self.custom.symbol_where_clauses),
);
}
@ -422,6 +445,7 @@ impl<'ascent, 'grammar, W: Write> CodeGenerator<'ascent, 'grammar, W, TableDrive
.variant_names
.insert(Symbol::Nonterminal(nt.clone()), name.clone());
}
rust!(self.out, "}}");
Ok(())
}
@ -766,14 +790,33 @@ impl<'ascent, 'grammar, W: Write> CodeGenerator<'ascent, 'grammar, W, TableDrive
Ok(())
}
fn token_to_integer(&mut self, integer: &str, lookahead: &str) -> io::Result<()> {
rust!(
self.out,
"{p}{integer} = match {p}{lookahead}.1 {{",
integer = integer,
lookahead = lookahead,
p = self.prefix
fn write_token_to_integer_fn(&mut self) -> io::Result<()> {
let token_type = self.types.terminal_token_type();
let parameters = vec![
format!(
"{p}token: &{token_type}",
p = self.prefix,
token_type = token_type,
),
format!("_: {}", self.phantom_data_type()),
];
try!(
self.out
.fn_header(
&Visibility::Priv,
format!("{p}token_to_integer", p = self.prefix)
).with_type_parameters(&self.grammar.type_parameters)
.with_where_clauses(&self.grammar.where_clauses)
.with_parameters(parameters)
.with_return_type(format!("Option<usize>"))
.emit()
);
rust!(self.out, "{{");
rust!(self.out, "match *{p}token {{", p = self.prefix);
for (terminal, index) in self.grammar.terminals.all.iter().zip(0..) {
if *terminal == TerminalString::Error {
continue;
@ -781,37 +824,78 @@ impl<'ascent, 'grammar, W: Write> CodeGenerator<'ascent, 'grammar, W, TableDrive
let pattern = self.grammar.pattern(terminal).map(&mut |_| "_");
rust!(
self.out,
"{pattern} if true => {index},",
"{pattern} if true => Some({index}),",
pattern = pattern,
index = index
);
}
rust!(self.out, "_ => {{");
rust!(self.out, "_ => None,");
rust!(self.out, "}}");
rust!(self.out, "}}");
Ok(())
}
fn token_to_integer(&mut self, integer: &str, lookahead: &str) -> io::Result<()> {
let phantom_data_expr = self.phantom_data_expr();
rust!(
self.out,
"{p}{integer} = match {p}token_to_integer(&{p}{lookahead}.1, {pde}) {{",
p = self.prefix,
integer = integer,
lookahead = lookahead,
pde = phantom_data_expr,
);
rust!(self.out, "Some({p}i) => {p}i,", p = self.prefix,);
rust!(self.out, "None => {{",);
let prefix = self.prefix;
try!(self.let_unrecognized_token_error(
self.let_unrecognized_token_error(
"error",
&format!("Some({p}{lookahead})", lookahead = lookahead, p = prefix)
));
rust!(self.out, "return Err({p}error);", p = self.prefix);
&format!("Some({p}{lookahead})", lookahead = lookahead, p = prefix),
)?;
rust!(self.out, "return Err({p}error);", p = prefix);
rust!(self.out, "}}");
rust!(self.out, "}};");
Ok(())
}
fn token_to_symbol(&mut self) -> io::Result<()> {
rust!(
self.out,
"let {}symbol = match {}integer {{",
self.prefix,
self.prefix
fn write_token_to_symbol_fn(&mut self) -> io::Result<()> {
let symbol_type = self.symbol_type();
let token_type = self.types.terminal_token_type();
let parameters = vec![
format!("{p}token_index: usize", p = self.prefix,),
format!(
"{p}token: {token_type}",
p = self.prefix,
token_type = token_type,
),
format!("_: {}", self.phantom_data_type()),
];
try!(
self.out
.fn_header(
&Visibility::Priv,
format!("{p}token_to_symbol", p = self.prefix),
).with_type_parameters(&self.grammar.type_parameters)
.with_where_clauses(&self.grammar.where_clauses)
.with_parameters(parameters)
.with_return_type(symbol_type)
.emit()
);
rust!(self.out, "{{");
rust!(self.out, "match {p}token_index {{", p = self.prefix,);
for (terminal, index) in self.grammar.terminals.all.iter().zip(0..) {
if *terminal == TerminalString::Error {
continue;
}
rust!(self.out, "{} => match {}lookahead.1 {{", index, self.prefix);
rust!(self.out, "{} => match {}token {{", index, self.prefix);
let mut pattern_names = vec![];
let pattern = self.grammar.pattern(terminal).map(&mut |_| {
@ -829,11 +913,11 @@ impl<'ascent, 'grammar, W: Write> CodeGenerator<'ascent, 'grammar, W, TableDrive
let variant_name = self.variant_name_for_symbol(&Symbol::Terminal(terminal.clone()));
rust!(
self.out,
"{} => {}Symbol::{}(({})),",
pattern,
self.prefix,
variant_name,
pattern_names.join(", ")
"{pattern} => {p}Symbol::{variant_name}(({pattern_names})),",
pattern = pattern,
p = self.prefix,
variant_name = variant_name,
pattern_names = pattern_names.join(", "),
);
rust!(self.out, "_ => unreachable!(),");
rust!(self.out, "}},");
@ -841,7 +925,20 @@ impl<'ascent, 'grammar, W: Write> CodeGenerator<'ascent, 'grammar, W, TableDrive
rust!(self.out, "_ => unreachable!(),");
rust!(self.out, "}};");
rust!(self.out, "}}");
rust!(self.out, "}}");
Ok(())
}
fn token_to_symbol(&mut self) -> io::Result<()> {
let phantom_data_expr = self.phantom_data_expr();
rust!(
self.out,
"let {p}symbol = {p}token_to_symbol({p}integer, {p}lookahead.1, {pde});",
p = self.prefix,
pde = phantom_data_expr,
);
Ok(())
}
@ -1164,7 +1261,7 @@ impl<'ascent, 'grammar, W: Write> CodeGenerator<'ascent, 'grammar, W, TableDrive
Ok(())
}
fn variant_name_for_symbol(&mut self, s: &Symbol) -> String {
fn variant_name_for_symbol(&self, s: &Symbol) -> String {
self.custom.variant_names[s].clone()
}
@ -1886,6 +1983,96 @@ impl<'ascent, 'grammar, W: Write> CodeGenerator<'ascent, 'grammar, W, TableDrive
Ok(())
}
fn write_simulate_reduce_fn(&mut self) -> io::Result<()> {
let state_type = self.custom.state_type;
let parameters = vec![
format!(
"{p}reduce_index: {state_type}",
p = self.prefix,
state_type = state_type,
),
format!("_: {}", self.phantom_data_type()),
];
try!(
self.out
.fn_header(
&Visibility::Priv,
format!("{p}simulate_reduce", p = self.prefix),
).with_type_parameters(&self.grammar.type_parameters)
.with_where_clauses(&self.grammar.where_clauses)
.with_parameters(parameters)
.with_return_type(format!(
"{p}state_machine::SimulatedReduce<{p}StateMachine<{gtp}>>",
p = self.prefix,
gtp = Sep(", ", &self.grammar.type_parameters),
)).emit()
);
rust!(self.out, "{{");
rust!(self.out, "match {p}reduce_index {{", p = self.prefix,);
for (production, index) in self
.grammar
.nonterminals
.values()
.flat_map(|nt| &nt.productions)
.zip(0..)
{
if Tls::session().emit_comments {
rust!(self.out, "// simulate {:?}", production);
}
// if we just reduced the start symbol, that is also an accept criteria
if production.nonterminal == self.start_symbol {
rust!(
self.out,
"{index} => {p}state_machine::SimulatedReduce::Accept,",
index = index,
p = self.prefix,
);
} else {
let num_symbols = production.symbols.len();
let nt = self
.custom
.all_nonterminals
.iter()
.position(|x| *x == production.nonterminal)
.unwrap();
rust!(self.out, "{} => {{", index);
if DEBUG_PRINT {
rust!(
self.out,
"println!(r##\"accepts: simulating {:?}\"##);",
production
);
}
rust!(
self.out,
"{p}state_machine::SimulatedReduce::Reduce {{",
p = self.prefix,
);
rust!(
self.out,
"states_to_pop: {num_symbols},",
num_symbols = num_symbols,
);
rust!(self.out, "nonterminal_produced: {nt},", nt = nt);
rust!(self.out, "}}");
rust!(self.out, "}}");
}
}
rust!(
self.out,
"_ => panic!(\"invalid reduction index {{}}\", {}reduce_index)",
self.prefix,
);
rust!(self.out, "}}"); // end match
rust!(self.out, "}}");
Ok(())
}
/// The `accepts` function
///
/// ```ignore
@ -1918,6 +2105,7 @@ impl<'ascent, 'grammar, W: Write> CodeGenerator<'ascent, 'grammar, W, TableDrive
return Ok(());
}
let phantom_data_expr = self.phantom_data_expr();
let actions_per_state = self.grammar.terminals.all.len();
let parameters = vec![
format!(
@ -2020,54 +2208,23 @@ impl<'ascent, 'grammar, W: Write> CodeGenerator<'ascent, 'grammar, W, TableDrive
// effect on the state stack.
rust!(
self.out,
"let ({p}to_pop, {p}nt) = match -{p}action {{",
p = self.prefix
"let ({p}to_pop, {p}nt) = match {p}simulate_reduce(-({p}action + 1), {pde}) {{",
p = self.prefix,
pde = phantom_data_expr,
);
for (production, index) in self
.grammar
.nonterminals
.values()
.flat_map(|nt| &nt.productions)
.zip(1..)
{
if Tls::session().emit_comments {
rust!(self.out, "// simulate {:?}", production);
}
// if we just reduced the start symbol, that is also an accept criteria
if production.nonterminal == self.start_symbol {
rust!(self.out, "{} => return true,", index);
} else {
let num_symbols = production.symbols.len();
let nt = self
.custom
.all_nonterminals
.iter()
.position(|x| *x == production.nonterminal)
.unwrap();
rust!(self.out, "{} => {{", index);
if DEBUG_PRINT {
rust!(
self.out,
"println!(r##\"accepts: simulating {:?}\"##);",
production
"{p}state_machine::SimulatedReduce::Reduce {{",
p = self.prefix,
);
}
rust!(self.out, "states_to_pop, nonterminal_produced",);
rust!(self.out, "}} => (states_to_pop, nonterminal_produced),",);
rust!(
self.out,
"({num_symbols}, {nt})",
num_symbols = num_symbols,
nt = nt
"{p}state_machine::SimulatedReduce::Accept => return true,",
p = self.prefix,
);
rust!(self.out, "}}");
}
}
rust!(
self.out,
"_ => panic!(\"invalid action code {{}}\", {}action)",
self.prefix
);
rust!(self.out, "}};"); // end match
rust!(self.out, "}};");
rust!(self.out, "{p}states_len -= {p}to_pop;", p = self.prefix);
rust!(
@ -2110,9 +2267,9 @@ impl<'ascent, 'grammar, W: Write> CodeGenerator<'ascent, 'grammar, W, TableDrive
fn symbol_type(&self) -> String {
format!(
"{}Symbol<{}>",
self.prefix,
Sep(", ", &self.custom.symbol_type_params)
"{p}Symbol<{stp}>",
p = self.prefix,
stp = Sep(", ", &self.custom.symbol_type_params),
)
}