mirror of
https://github.com/fluencelabs/lalrpop
synced 2025-04-24 18:52:16 +00:00
generate state machine definition (but do not use to drive parsing)
This version however requires all lifetimes to be explicit. Suboptimal.
This commit is contained in:
parent
d0d6cc6bbb
commit
080611a725
@ -5,6 +5,7 @@ fn main() {
|
||||
.emit_comments(true)
|
||||
.force_build(true)
|
||||
.unit_test()
|
||||
.log_debug()
|
||||
.process_current_dir()
|
||||
.unwrap();
|
||||
}
|
||||
|
@ -1,7 +1,8 @@
|
||||
use std::str::FromStr;
|
||||
use associated_types_lib::ParseCallbacks;
|
||||
|
||||
grammar<P>(callbacks: &mut P) where P: ParseCallbacks;
|
||||
// FIXME: the 'a shouldn't be needed
|
||||
grammar<'a, P>(callbacks: &'a mut P) where P: ParseCallbacks, P: 'a;
|
||||
|
||||
pub Term: P::Term = {
|
||||
<n:Num> => n.into(),
|
||||
|
@ -1,6 +1,7 @@
|
||||
use std::str::FromStr;
|
||||
|
||||
grammar<F>(logger: &mut F) where F: for<'a> FnMut(&'a str);
|
||||
// FIXME: 'logger shouldn't be needed
|
||||
grammar<'logger, F>(logger: &'logger mut F) where F: for<'a> FnMut(&'a str) + 'logger;
|
||||
|
||||
pub Term: i32 = {
|
||||
<n:Num> => {
|
||||
|
@ -147,9 +147,14 @@ pub trait ParserAction<D: ParserDefinition>: Copy + Clone + Debug {
|
||||
fn is_error(self) -> bool;
|
||||
}
|
||||
|
||||
pub struct SimulatedReduce<D: ParserDefinition> {
|
||||
states_to_pop: usize,
|
||||
nonterminal_produced: D::NonterminalIndex,
|
||||
pub enum SimulatedReduce<D: ParserDefinition> {
|
||||
Reduce {
|
||||
states_to_pop: usize,
|
||||
nonterminal_produced: D::NonterminalIndex,
|
||||
},
|
||||
|
||||
// This reduce is the "start" fn, so the parse is done.
|
||||
Accept,
|
||||
}
|
||||
|
||||
// These aliases are an elaborate hack to get around
|
||||
@ -492,15 +497,22 @@ where
|
||||
// If we encounter a reduce action, we need to simulate its
|
||||
// effect on the state stack.
|
||||
if let Some(reduce_action) = action.as_reduce() {
|
||||
let SimulatedReduce {
|
||||
states_to_pop,
|
||||
nonterminal_produced,
|
||||
} = self.definition.simulate_reduce(reduce_action);
|
||||
states_len -= states_to_pop;
|
||||
states.truncate(states_len);
|
||||
let top = states[states_len - 1];
|
||||
let next_state = self.definition.goto(top, nonterminal_produced);
|
||||
states.push(next_state);
|
||||
match self.definition.simulate_reduce(reduce_action) {
|
||||
SimulatedReduce::Reduce {
|
||||
states_to_pop,
|
||||
nonterminal_produced,
|
||||
} => {
|
||||
states_len -= states_to_pop;
|
||||
states.truncate(states_len);
|
||||
let top = states[states_len - 1];
|
||||
let next_state = self.definition.goto(top, nonterminal_produced);
|
||||
states.push(next_state);
|
||||
}
|
||||
|
||||
SimulatedReduce::Accept => {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// If we encounter a shift action, we DO accept.
|
||||
assert!(action.is_shift());
|
||||
|
@ -152,24 +152,20 @@ impl<T: FreeVariables> FreeVariables for parse_tree::TypeBound<T> {
|
||||
parse_tree::TypeBound::Lifetime(l) => free_lifetime(type_parameters, l),
|
||||
parse_tree::TypeBound::Fn {
|
||||
forall,
|
||||
path,
|
||||
path: _,
|
||||
parameters,
|
||||
ret,
|
||||
} => path
|
||||
.free_variables(type_parameters)
|
||||
} => parameters.free_variables(type_parameters)
|
||||
.into_iter()
|
||||
.chain(parameters.free_variables(type_parameters))
|
||||
.chain(ret.free_variables(type_parameters))
|
||||
.filter(|tp| !forall.contains(tp))
|
||||
.collect(),
|
||||
parse_tree::TypeBound::Trait {
|
||||
forall,
|
||||
path,
|
||||
path: _,
|
||||
parameters,
|
||||
} => path
|
||||
.free_variables(type_parameters)
|
||||
} => parameters.free_variables(type_parameters)
|
||||
.into_iter()
|
||||
.chain(parameters.free_variables(type_parameters))
|
||||
.filter(|tp| !forall.contains(tp))
|
||||
.collect(),
|
||||
}
|
||||
|
@ -14,7 +14,7 @@ use util::Sep;
|
||||
// These concepts we re-use wholesale
|
||||
pub use grammar::parse_tree::{
|
||||
Annotation, InternToken, Lifetime, NonterminalString, Path, Span, TerminalLiteral,
|
||||
TerminalString, TypeBound, TypeBoundParameter, TypeParameter, Visibility,
|
||||
TerminalString, TypeBound, TypeParameter, Visibility,
|
||||
};
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
|
@ -347,10 +347,17 @@ impl<'codegen, 'grammar, W: Write, C> CodeGenerator<'codegen, 'grammar, W, C> {
|
||||
/// all type parameters are constrained, even if they are not
|
||||
/// used.
|
||||
pub fn phantom_data_type(&self) -> String {
|
||||
format!(
|
||||
"::std::marker::PhantomData<({})>",
|
||||
Sep(", ", &self.grammar.non_lifetime_type_parameters())
|
||||
)
|
||||
let phantom_bits: Vec<_> = self
|
||||
.grammar
|
||||
.type_parameters
|
||||
.iter()
|
||||
.map(|tp| match *tp {
|
||||
TypeParameter::Lifetime(ref l) => format!("&{} ()", l),
|
||||
|
||||
TypeParameter::Id(ref id) => id.to_string(),
|
||||
})
|
||||
.collect();
|
||||
format!("::std::marker::PhantomData<({})>", Sep(", ", &phantom_bits),)
|
||||
}
|
||||
|
||||
/// Returns expression that captures the user-declared type
|
||||
@ -358,9 +365,19 @@ impl<'codegen, 'grammar, W: Write, C> CodeGenerator<'codegen, 'grammar, W, C> {
|
||||
/// all type parameters are constrained, even if they are not
|
||||
/// used.
|
||||
pub fn phantom_data_expr(&self) -> String {
|
||||
let phantom_bits: Vec<_> = self
|
||||
.grammar
|
||||
.type_parameters
|
||||
.iter()
|
||||
.map(|tp| match *tp {
|
||||
TypeParameter::Lifetime(_) => format!("&()"),
|
||||
|
||||
TypeParameter::Id(ref id) => id.to_string(),
|
||||
})
|
||||
.collect();
|
||||
format!(
|
||||
"::std::marker::PhantomData::<({})>",
|
||||
Sep(", ", &self.grammar.non_lifetime_type_parameters())
|
||||
Sep(", ", &phantom_bits),
|
||||
)
|
||||
}
|
||||
}
|
||||
|
@ -34,222 +34,6 @@ pub fn compile<'grammar, W: Write>(
|
||||
table_driven.write()
|
||||
}
|
||||
|
||||
// We create three parse tables:
|
||||
//
|
||||
// - `ACTION[state * num_states + terminal]: i32`: given a state and next token,
|
||||
// yields an integer indicating whether to shift/reduce (see below)
|
||||
// - `EOF_ACTION[state]: i32`: as above, but for the EOF token
|
||||
// - `GOTO[state * num_states + nonterminal]: i32`: index + 1 of state to jump to when given
|
||||
// nonterminal is pushed (no error is possible)
|
||||
//
|
||||
// For the `ACTION` and `EOF_ACTION` tables, the value is an `i32` and
|
||||
// its interpretation varies depending on whether it is positive or
|
||||
// negative:
|
||||
//
|
||||
// - if zero, parse error.
|
||||
// - if a positive integer (not zero), it is the next state to shift to.
|
||||
// - if a negative integer (not zero), it is the index of a reduction
|
||||
// action to execute (actually index + 1).
|
||||
//
|
||||
// We maintain two stacks: one is a stack of state indexes (each an
|
||||
// u32). The other is a stack of values and spans: `(L, T, L)`. `L` is
|
||||
// the location type and represents the start/end span. `T` is the
|
||||
// value of the symbol. The type `T` is an `enum` that we synthesize
|
||||
// which contains a variant for all the possibilities:
|
||||
//
|
||||
// ```
|
||||
// enum Value<> {
|
||||
// // One variant for each terminal:
|
||||
// Term1(Ty1),
|
||||
// ...
|
||||
// TermN(TyN),
|
||||
//
|
||||
// // One variant for each nonterminal:
|
||||
// Nt1(Ty1),
|
||||
// ...
|
||||
// NtN(TyN),
|
||||
// }
|
||||
// ```
|
||||
//
|
||||
// The action parser function looks like this (pseudo-code):
|
||||
//
|
||||
// ```
|
||||
// fn parse_fn<TOKENS>(tokens: TOKENS) -> Result<T, Error>
|
||||
// where TOKENS: Iterator<Item=Result<(Location, Token, Location), Error>>
|
||||
// {
|
||||
// let mut states = vec![0]; // initial state is zero
|
||||
// let mut symbols = vec![];
|
||||
// 'shift: loop {
|
||||
// // Code to shift the next symbol and determine which terminal
|
||||
// // it is; emitted by `shift_symbol()`.
|
||||
// let lookahead = match tokens.next() {
|
||||
// Some(Ok(l)) => l,
|
||||
// None => break 'shift,
|
||||
// Some(Err(e)) => return Err(e),
|
||||
// };
|
||||
// let integer = match lookahead {
|
||||
// (_, PatternForTerminal0(...), _) => 0,
|
||||
// ...
|
||||
// };
|
||||
//
|
||||
// // Code to process next symbol.
|
||||
// 'inner: loop {
|
||||
// let symbol = match lookahead {
|
||||
// (l, PatternForTerminal0(...), r) => {
|
||||
// (l, Value::VariantForTerminal0(...), r),
|
||||
// }
|
||||
// ...
|
||||
// };
|
||||
// let state = *states.last().unwrap() as usize;
|
||||
// let action = ACTION[state * NUM_STATES + integer];
|
||||
// if action > 0 { // shift
|
||||
// states.push(action - 1);
|
||||
// symbols.push(symbol);
|
||||
// continue 'shift;
|
||||
// } else if action < 0 { // reduce
|
||||
// if let Some(r) = reduce(action, Some(&lookahead.0), &mut states, &mut symbols) {
|
||||
// // Give errors from within grammar a higher priority
|
||||
// if r.is_err() {
|
||||
// return r;
|
||||
// }
|
||||
// return Err(lalrpop_util::ParseError::ExtraToken { token: lookahead });
|
||||
// }
|
||||
// } else {
|
||||
// // Error recovery code: emitted by `try_error_recovery`
|
||||
// let mut err_lookahead = Some(lookahead);
|
||||
// let mut err_integer = Some(integer);
|
||||
// match error_recovery(&mut tokens, &mut states, &mut symbols, last_location,
|
||||
// &mut err_lookahead, &mut err_integer) {
|
||||
// Err(e) => return e,
|
||||
// Ok(Some(v)) => return Ok(v),
|
||||
// Ok(None) => { }
|
||||
// }
|
||||
// match (err_lookahead, err_integer) {
|
||||
// (Some(l), Some(i)) => {
|
||||
// lookahead = l;
|
||||
// integer = i;
|
||||
// continue 'inner;
|
||||
// }
|
||||
// _ => break 'shift;
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// // Process EOF
|
||||
// while let Some(state) = self.states.pop() {
|
||||
// let action = EOF_ACTION[state * NUM_STATES];
|
||||
// if action < 0 { // reduce
|
||||
// try!(reduce(action, None, &mut states, &mut symbols));
|
||||
// } else {
|
||||
// let mut err_lookahead = None;
|
||||
// let mut err_integer = None;
|
||||
// match error_recovery(&mut tokens, &mut states, &mut symbols, last_location,
|
||||
// &mut err_lookahead, &mut err_integer) {
|
||||
// Err(e) => return e,
|
||||
// Ok(Some(v)) => return Ok(v),
|
||||
// Ok(None) => { }
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// // generated by `emit_reduce_actions()`
|
||||
// fn reduce(action: i32, lookahead_start: Option<&L>,
|
||||
// states: &mut Vec<i32>, symbols: &mut Vec<(L, Symbol, L))
|
||||
// -> Option<Result<..>> {
|
||||
// let nonterminal = match -action {
|
||||
// 0 => {
|
||||
// // Execute reduce action 0 to produce nonterminal N, popping from stacks etc
|
||||
// // (generated by `emit_reduce_action()`). If this is a fallible action,
|
||||
// // it may return `Some(Err)`, and if this is a reduce of the start NT,
|
||||
// // it may return `Some(Ok)`.
|
||||
// states.pop(); // however many times
|
||||
// symbols.pop(); // however many times
|
||||
// let data = action_fn0(...);
|
||||
// symbols.push((l, Value::VariantForNonterminalN(data), r));
|
||||
// N
|
||||
// }
|
||||
// ...
|
||||
// };
|
||||
// let state = *states.last().unwrap();
|
||||
// let next_state = GOTO[state * NUM_STATES + nonterminal] - 1;
|
||||
// state_stack.push(next_state);
|
||||
// None
|
||||
// }
|
||||
//
|
||||
// generated by `write_error_recovery_fn`
|
||||
// fn error_recovery(...) {
|
||||
// let mut dropped_tokens = vec![];
|
||||
//
|
||||
// // First, reduce as long as we can with the `!` token as lookahead
|
||||
// loop {
|
||||
// let state = *states.last().unwrap() as usize;
|
||||
// let action = ACTION[(state + 1) * ACTIONS_PER_STATE - 1];
|
||||
// if action >= 0 {
|
||||
// break;
|
||||
// }
|
||||
// if let Some(r) = reduce(action, None, &mut states, &mut symbols) {
|
||||
// return r;
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// let top0;
|
||||
// 'find_state: loop {
|
||||
// // See if there is a state that can shift `!` token. If so,
|
||||
// // break.
|
||||
// for top in (0..states.len()).rev() {
|
||||
// let state = states[top];
|
||||
// let action = ACTION[state * ACTIONS_PER_STATE + 1];
|
||||
// if action <= 0 { continue; }
|
||||
// let error_state = action - 1;
|
||||
// if accepts(error_state, &states[..top+1], *opt-integer) {
|
||||
// top0 = top;
|
||||
// break 'find_state;
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// // Else, drop a token from the input and try again.
|
||||
// 'eof: loop {
|
||||
// match opt_lookahead.take() {
|
||||
// None => {
|
||||
// // No more tokens to drop
|
||||
// return Err(...);
|
||||
// }
|
||||
// Some(mut lookahead) => {
|
||||
// dropped_tokens.push(lookahead);
|
||||
// next_token()
|
||||
// opt_lookahead = Some(match tokens.next() {
|
||||
// Some(Ok(l)) => l,
|
||||
// None => break 'eof,
|
||||
// Some(Err(e)) => return Err(e),
|
||||
// });
|
||||
// opt_integer = Some(match lookahead {
|
||||
// (_, PatternForTerminal0(...), _) => 0,
|
||||
// ...
|
||||
// });
|
||||
// continue 'find_state;
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
// opt_lookahead = None;
|
||||
// opt_integer = None;
|
||||
// }
|
||||
//
|
||||
// let top = top0;
|
||||
// let start = /* figure out "start" of error */;
|
||||
// let end = /* figure out "end" of error */;
|
||||
// states.truncate(top + 1);
|
||||
// symbols.truncate(top);
|
||||
// let recover_state = states[top];
|
||||
// let error_state = ACTION[recover_state * ACTIONS_PER_STATE + 1] - 1;
|
||||
// states.push(error_state);
|
||||
// let recovery = ErrorRecovery { dropped_tokens, ... };
|
||||
// symbols.push((start, Symbol::Termerror(recovery), end));
|
||||
// Ok(None)
|
||||
// }
|
||||
// ```
|
||||
|
||||
enum Comment<'a, T> {
|
||||
Goto(T, usize),
|
||||
Error(T),
|
||||
@ -354,6 +138,10 @@ impl<'ascent, 'grammar, W: Write> CodeGenerator<'ascent, 'grammar, W, TableDrive
|
||||
self.write_parse_mod(|this| {
|
||||
try!(this.write_value_type_defn());
|
||||
try!(this.write_parse_table());
|
||||
try!(this.write_machine_definition());
|
||||
try!(this.write_token_to_integer_fn());
|
||||
try!(this.write_token_to_symbol_fn());
|
||||
try!(this.write_simulate_reduce_fn());
|
||||
try!(this.write_parser_fn());
|
||||
try!(this.write_error_recovery_fn());
|
||||
try!(this.write_accepts_fn());
|
||||
@ -364,6 +152,241 @@ impl<'ascent, 'grammar, W: Write> CodeGenerator<'ascent, 'grammar, W, TableDrive
|
||||
})
|
||||
}
|
||||
|
||||
fn write_machine_definition(&mut self) -> io::Result<()> {
|
||||
let grammar_type_params = Sep(", ", &self.grammar.type_parameters);
|
||||
let grammar_where_clauses = Sep(", ", &self.grammar.where_clauses);
|
||||
// let parse_error_type = self.types.parse_error_type();
|
||||
let error_type = self.types.error_type();
|
||||
let token_type = self.types.terminal_token_type();
|
||||
// let spanned_symbol_type = self.spanned_symbol_type();
|
||||
// let triple_type = self.types.triple_type();
|
||||
let loc_type = self.types.terminal_loc_type();
|
||||
// let actions_per_state = self.grammar.terminals.all.len();
|
||||
let start_type = self.types.nonterminal_type(&self.start_symbol);
|
||||
let state_type = self.custom.state_type;
|
||||
let symbol_type = self.symbol_type();
|
||||
let phantom_data_type = self.phantom_data_type();
|
||||
let phantom_data_expr = self.phantom_data_expr();
|
||||
|
||||
rust!(
|
||||
self.out,
|
||||
"pub struct {p}StateMachine<{gtp}>",
|
||||
p = self.prefix,
|
||||
gtp = grammar_type_params
|
||||
);
|
||||
rust!(self.out, "where {gwc}", gwc = grammar_where_clauses);
|
||||
rust!(self.out, "{{");
|
||||
for param in &self.grammar.parameters {
|
||||
rust!(self.out, "{name}: {ty},", name = param.name, ty = param.ty,);
|
||||
}
|
||||
rust!(
|
||||
self.out,
|
||||
"{p}phantom: {phantom},",
|
||||
p = self.prefix,
|
||||
phantom = phantom_data_type,
|
||||
);
|
||||
rust!(self.out, "}}");
|
||||
|
||||
rust!(
|
||||
self.out,
|
||||
"impl<{gtp}> {p}state_machine::ParserDefinition for {p}StateMachine<{gtp}>",
|
||||
p = self.prefix,
|
||||
gtp = grammar_type_params,
|
||||
);
|
||||
rust!(self.out, "where {gwc}", gwc = grammar_where_clauses);
|
||||
rust!(self.out, "{{");
|
||||
rust!(self.out, "type Location = {t};", t = loc_type);
|
||||
rust!(self.out, "type Error = {t};", t = error_type);
|
||||
rust!(self.out, "type Token = {t};", t = token_type);
|
||||
rust!(self.out, "type TokenIndex = usize;");
|
||||
rust!(
|
||||
self.out,
|
||||
"type Symbol = {symbol_type};",
|
||||
symbol_type = symbol_type,
|
||||
);
|
||||
rust!(self.out, "type Success = {t};", t = start_type);
|
||||
rust!(self.out, "type StateIndex = {t};", t = state_type);
|
||||
rust!(self.out, "type Action = {t};", t = state_type);
|
||||
rust!(self.out, "type ReduceIndex = {t};", t = state_type);
|
||||
rust!(self.out, "type NonterminalIndex = usize;");
|
||||
|
||||
rust!(self.out, "");
|
||||
rust!(self.out, "#[inline]");
|
||||
rust!(self.out, "fn start_location(&self) -> Self::Location {{");
|
||||
rust!(self.out, " Default::default()");
|
||||
rust!(self.out, "}}");
|
||||
|
||||
rust!(self.out, "");
|
||||
rust!(self.out, "#[inline]");
|
||||
rust!(
|
||||
self.out,
|
||||
"fn token_to_index(&self, token: &Self::Token) -> Option<usize> {{"
|
||||
);
|
||||
rust!(
|
||||
self.out,
|
||||
"{p}token_to_integer(token, {phantom})",
|
||||
p = self.prefix,
|
||||
phantom = phantom_data_expr,
|
||||
);
|
||||
rust!(self.out, "}}");
|
||||
|
||||
rust!(self.out, "");
|
||||
rust!(self.out, "#[inline]");
|
||||
rust!(
|
||||
self.out,
|
||||
"fn action(&self, state: {state_type}, integer: usize) -> {state_type} {{",
|
||||
state_type = state_type
|
||||
);
|
||||
rust!(
|
||||
self.out,
|
||||
"{p}ACTION[((state * {num_term}) as usize) + integer]",
|
||||
p = self.prefix,
|
||||
num_term = self.grammar.terminals.all.len(),
|
||||
);
|
||||
rust!(self.out, "}}");
|
||||
|
||||
rust!(self.out, "");
|
||||
rust!(self.out, "#[inline]");
|
||||
rust!(
|
||||
self.out,
|
||||
"fn error_action(&self, state: {state_type}) -> {state_type} {{",
|
||||
state_type = state_type,
|
||||
);
|
||||
rust!(
|
||||
self.out,
|
||||
"{p}ACTION[((state * {num_term}) as usize) + {num_term}]",
|
||||
p = self.prefix,
|
||||
num_term = self.grammar.terminals.all.len(),
|
||||
);
|
||||
rust!(self.out, "}}");
|
||||
|
||||
rust!(self.out, "");
|
||||
rust!(self.out, "#[inline]");
|
||||
rust!(
|
||||
self.out,
|
||||
"fn eof_action(&self, state: {state_type}) -> {state_type} {{",
|
||||
state_type = state_type,
|
||||
);
|
||||
rust!(self.out, "{p}EOF_ACTION[state as usize]", p = self.prefix,);
|
||||
rust!(self.out, "}}");
|
||||
|
||||
rust!(self.out, "");
|
||||
rust!(self.out, "#[inline]");
|
||||
rust!(
|
||||
self.out,
|
||||
"fn goto(&self, state: {state_type}, nt: usize) -> {state_type} {{",
|
||||
state_type = state_type,
|
||||
);
|
||||
rust!(
|
||||
self.out,
|
||||
"{p}GOTO[(state * {num_non_term}) as usize + nt] - 1",
|
||||
p = self.prefix,
|
||||
num_non_term = self.grammar.nonterminals.len(),
|
||||
);
|
||||
rust!(self.out, "}}");
|
||||
|
||||
rust!(self.out, "");
|
||||
rust!(
|
||||
self.out,
|
||||
"fn token_to_symbol(&self, token_index: usize, token: Self::Token) -> Self::Symbol {{"
|
||||
);
|
||||
rust!(
|
||||
self.out,
|
||||
"{p}token_to_symbol(token_index, token, {phantom})",
|
||||
p = self.prefix,
|
||||
phantom = phantom_data_expr,
|
||||
);
|
||||
rust!(self.out, "}}");
|
||||
|
||||
rust!(self.out, "");
|
||||
rust!(
|
||||
self.out,
|
||||
"fn expected_tokens(&self, state: {state_type}) -> Vec<String> {{",
|
||||
state_type = state_type,
|
||||
);
|
||||
rust!(
|
||||
self.out,
|
||||
"{p}expected_tokens(state as usize)",
|
||||
p = self.prefix
|
||||
);
|
||||
rust!(self.out, "}}");
|
||||
|
||||
rust!(self.out, "");
|
||||
rust!(self.out, "#[inline]");
|
||||
rust!(self.out, "fn uses_error_recovery(&self) -> bool {{");
|
||||
rust!(self.out, "{}", self.grammar.uses_error_recovery);
|
||||
rust!(self.out, "}}");
|
||||
|
||||
rust!(self.out, "");
|
||||
rust!(self.out, "#[inline]");
|
||||
rust!(self.out, "fn error_recovery_symbol(");
|
||||
rust!(self.out, "&self,");
|
||||
rust!(
|
||||
self.out,
|
||||
"recovery: {p}state_machine::ErrorRecovery<Self>,",
|
||||
p = self.prefix
|
||||
);
|
||||
rust!(self.out, ") -> Self::Symbol {{");
|
||||
if self.grammar.uses_error_recovery {
|
||||
let error_variant =
|
||||
self.variant_name_for_symbol(&Symbol::Terminal(TerminalString::Error));
|
||||
rust!(
|
||||
self.out,
|
||||
"{p}Symbol::{e}(recovery)",
|
||||
p = self.prefix,
|
||||
e = error_variant
|
||||
);
|
||||
} else {
|
||||
rust!(
|
||||
self.out,
|
||||
"panic!(\"error recovery not enabled for this grammar\")"
|
||||
)
|
||||
}
|
||||
rust!(self.out, "}}");
|
||||
|
||||
rust!(self.out, "");
|
||||
rust!(self.out, "fn reduce(");
|
||||
rust!(self.out, "&self,");
|
||||
rust!(self.out, "action: {state_type},", state_type = state_type);
|
||||
rust!(self.out, "start_location: Option<&Self::Location>,");
|
||||
rust!(
|
||||
self.out,
|
||||
"states: &mut Vec<{state_type}>,",
|
||||
state_type = state_type
|
||||
);
|
||||
rust!(
|
||||
self.out,
|
||||
"symbols: &mut Vec<{p}state_machine::SymbolTriple<Self>>,",
|
||||
p = self.prefix,
|
||||
);
|
||||
rust!(
|
||||
self.out,
|
||||
") -> Option<{p}state_machine::ParseResult<Self>> {{",
|
||||
p = self.prefix,
|
||||
);
|
||||
rust!(self.out, "panic!()");
|
||||
rust!(self.out, "}}");
|
||||
|
||||
rust!(self.out, "");
|
||||
rust!(
|
||||
self.out,
|
||||
"fn simulate_reduce(&self, action: {state_type}) -> {p}state_machine::SimulatedReduce<Self> {{",
|
||||
p = self.prefix,
|
||||
state_type = state_type,
|
||||
);
|
||||
rust!(
|
||||
self.out,
|
||||
"{p}simulate_reduce(action, {phantom})",
|
||||
p = self.prefix,
|
||||
phantom = phantom_data_expr,
|
||||
);
|
||||
rust!(self.out, "}}");
|
||||
|
||||
rust!(self.out, "}}");
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn write_value_type_defn(&mut self) -> io::Result<()> {
|
||||
// sometimes some of the variants are not used, particularly
|
||||
// if we are generating multiple parsers from the same file:
|
||||
@ -372,14 +395,14 @@ impl<'ascent, 'grammar, W: Write> CodeGenerator<'ascent, 'grammar, W, TableDrive
|
||||
self.out,
|
||||
"pub enum {}Symbol<{}>",
|
||||
self.prefix,
|
||||
Sep(", ", &self.custom.symbol_type_params)
|
||||
Sep(", ", &self.custom.symbol_type_params),
|
||||
);
|
||||
|
||||
if !self.custom.symbol_where_clauses.is_empty() {
|
||||
rust!(
|
||||
self.out,
|
||||
" where {}",
|
||||
Sep(", ", &self.custom.symbol_where_clauses)
|
||||
Sep(", ", &self.custom.symbol_where_clauses),
|
||||
);
|
||||
}
|
||||
|
||||
@ -422,6 +445,7 @@ impl<'ascent, 'grammar, W: Write> CodeGenerator<'ascent, 'grammar, W, TableDrive
|
||||
.variant_names
|
||||
.insert(Symbol::Nonterminal(nt.clone()), name.clone());
|
||||
}
|
||||
|
||||
rust!(self.out, "}}");
|
||||
Ok(())
|
||||
}
|
||||
@ -766,14 +790,33 @@ impl<'ascent, 'grammar, W: Write> CodeGenerator<'ascent, 'grammar, W, TableDrive
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn token_to_integer(&mut self, integer: &str, lookahead: &str) -> io::Result<()> {
|
||||
rust!(
|
||||
self.out,
|
||||
"{p}{integer} = match {p}{lookahead}.1 {{",
|
||||
integer = integer,
|
||||
lookahead = lookahead,
|
||||
p = self.prefix
|
||||
fn write_token_to_integer_fn(&mut self) -> io::Result<()> {
|
||||
let token_type = self.types.terminal_token_type();
|
||||
|
||||
let parameters = vec![
|
||||
format!(
|
||||
"{p}token: &{token_type}",
|
||||
p = self.prefix,
|
||||
token_type = token_type,
|
||||
),
|
||||
format!("_: {}", self.phantom_data_type()),
|
||||
];
|
||||
|
||||
try!(
|
||||
self.out
|
||||
.fn_header(
|
||||
&Visibility::Priv,
|
||||
format!("{p}token_to_integer", p = self.prefix)
|
||||
).with_type_parameters(&self.grammar.type_parameters)
|
||||
.with_where_clauses(&self.grammar.where_clauses)
|
||||
.with_parameters(parameters)
|
||||
.with_return_type(format!("Option<usize>"))
|
||||
.emit()
|
||||
);
|
||||
rust!(self.out, "{{");
|
||||
|
||||
rust!(self.out, "match *{p}token {{", p = self.prefix);
|
||||
|
||||
for (terminal, index) in self.grammar.terminals.all.iter().zip(0..) {
|
||||
if *terminal == TerminalString::Error {
|
||||
continue;
|
||||
@ -781,37 +824,78 @@ impl<'ascent, 'grammar, W: Write> CodeGenerator<'ascent, 'grammar, W, TableDrive
|
||||
let pattern = self.grammar.pattern(terminal).map(&mut |_| "_");
|
||||
rust!(
|
||||
self.out,
|
||||
"{pattern} if true => {index},",
|
||||
"{pattern} if true => Some({index}),",
|
||||
pattern = pattern,
|
||||
index = index
|
||||
);
|
||||
}
|
||||
|
||||
rust!(self.out, "_ => {{");
|
||||
rust!(self.out, "_ => None,");
|
||||
|
||||
rust!(self.out, "}}");
|
||||
rust!(self.out, "}}");
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn token_to_integer(&mut self, integer: &str, lookahead: &str) -> io::Result<()> {
|
||||
let phantom_data_expr = self.phantom_data_expr();
|
||||
|
||||
rust!(
|
||||
self.out,
|
||||
"{p}{integer} = match {p}token_to_integer(&{p}{lookahead}.1, {pde}) {{",
|
||||
p = self.prefix,
|
||||
integer = integer,
|
||||
lookahead = lookahead,
|
||||
pde = phantom_data_expr,
|
||||
);
|
||||
rust!(self.out, "Some({p}i) => {p}i,", p = self.prefix,);
|
||||
rust!(self.out, "None => {{",);
|
||||
let prefix = self.prefix;
|
||||
try!(self.let_unrecognized_token_error(
|
||||
self.let_unrecognized_token_error(
|
||||
"error",
|
||||
&format!("Some({p}{lookahead})", lookahead = lookahead, p = prefix)
|
||||
));
|
||||
rust!(self.out, "return Err({p}error);", p = self.prefix);
|
||||
&format!("Some({p}{lookahead})", lookahead = lookahead, p = prefix),
|
||||
)?;
|
||||
rust!(self.out, "return Err({p}error);", p = prefix);
|
||||
rust!(self.out, "}}");
|
||||
|
||||
rust!(self.out, "}};");
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn token_to_symbol(&mut self) -> io::Result<()> {
|
||||
rust!(
|
||||
self.out,
|
||||
"let {}symbol = match {}integer {{",
|
||||
self.prefix,
|
||||
self.prefix
|
||||
fn write_token_to_symbol_fn(&mut self) -> io::Result<()> {
|
||||
let symbol_type = self.symbol_type();
|
||||
let token_type = self.types.terminal_token_type();
|
||||
|
||||
let parameters = vec![
|
||||
format!("{p}token_index: usize", p = self.prefix,),
|
||||
format!(
|
||||
"{p}token: {token_type}",
|
||||
p = self.prefix,
|
||||
token_type = token_type,
|
||||
),
|
||||
format!("_: {}", self.phantom_data_type()),
|
||||
];
|
||||
|
||||
try!(
|
||||
self.out
|
||||
.fn_header(
|
||||
&Visibility::Priv,
|
||||
format!("{p}token_to_symbol", p = self.prefix),
|
||||
).with_type_parameters(&self.grammar.type_parameters)
|
||||
.with_where_clauses(&self.grammar.where_clauses)
|
||||
.with_parameters(parameters)
|
||||
.with_return_type(symbol_type)
|
||||
.emit()
|
||||
);
|
||||
rust!(self.out, "{{");
|
||||
|
||||
rust!(self.out, "match {p}token_index {{", p = self.prefix,);
|
||||
for (terminal, index) in self.grammar.terminals.all.iter().zip(0..) {
|
||||
if *terminal == TerminalString::Error {
|
||||
continue;
|
||||
}
|
||||
rust!(self.out, "{} => match {}lookahead.1 {{", index, self.prefix);
|
||||
rust!(self.out, "{} => match {}token {{", index, self.prefix);
|
||||
|
||||
let mut pattern_names = vec![];
|
||||
let pattern = self.grammar.pattern(terminal).map(&mut |_| {
|
||||
@ -829,11 +913,11 @@ impl<'ascent, 'grammar, W: Write> CodeGenerator<'ascent, 'grammar, W, TableDrive
|
||||
let variant_name = self.variant_name_for_symbol(&Symbol::Terminal(terminal.clone()));
|
||||
rust!(
|
||||
self.out,
|
||||
"{} => {}Symbol::{}(({})),",
|
||||
pattern,
|
||||
self.prefix,
|
||||
variant_name,
|
||||
pattern_names.join(", ")
|
||||
"{pattern} => {p}Symbol::{variant_name}(({pattern_names})),",
|
||||
pattern = pattern,
|
||||
p = self.prefix,
|
||||
variant_name = variant_name,
|
||||
pattern_names = pattern_names.join(", "),
|
||||
);
|
||||
rust!(self.out, "_ => unreachable!(),");
|
||||
rust!(self.out, "}},");
|
||||
@ -841,7 +925,20 @@ impl<'ascent, 'grammar, W: Write> CodeGenerator<'ascent, 'grammar, W, TableDrive
|
||||
|
||||
rust!(self.out, "_ => unreachable!(),");
|
||||
|
||||
rust!(self.out, "}};");
|
||||
rust!(self.out, "}}");
|
||||
rust!(self.out, "}}");
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn token_to_symbol(&mut self) -> io::Result<()> {
|
||||
let phantom_data_expr = self.phantom_data_expr();
|
||||
|
||||
rust!(
|
||||
self.out,
|
||||
"let {p}symbol = {p}token_to_symbol({p}integer, {p}lookahead.1, {pde});",
|
||||
p = self.prefix,
|
||||
pde = phantom_data_expr,
|
||||
);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@ -1164,7 +1261,7 @@ impl<'ascent, 'grammar, W: Write> CodeGenerator<'ascent, 'grammar, W, TableDrive
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn variant_name_for_symbol(&mut self, s: &Symbol) -> String {
|
||||
fn variant_name_for_symbol(&self, s: &Symbol) -> String {
|
||||
self.custom.variant_names[s].clone()
|
||||
}
|
||||
|
||||
@ -1886,6 +1983,96 @@ impl<'ascent, 'grammar, W: Write> CodeGenerator<'ascent, 'grammar, W, TableDrive
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn write_simulate_reduce_fn(&mut self) -> io::Result<()> {
|
||||
let state_type = self.custom.state_type;
|
||||
|
||||
let parameters = vec![
|
||||
format!(
|
||||
"{p}reduce_index: {state_type}",
|
||||
p = self.prefix,
|
||||
state_type = state_type,
|
||||
),
|
||||
format!("_: {}", self.phantom_data_type()),
|
||||
];
|
||||
|
||||
try!(
|
||||
self.out
|
||||
.fn_header(
|
||||
&Visibility::Priv,
|
||||
format!("{p}simulate_reduce", p = self.prefix),
|
||||
).with_type_parameters(&self.grammar.type_parameters)
|
||||
.with_where_clauses(&self.grammar.where_clauses)
|
||||
.with_parameters(parameters)
|
||||
.with_return_type(format!(
|
||||
"{p}state_machine::SimulatedReduce<{p}StateMachine<{gtp}>>",
|
||||
p = self.prefix,
|
||||
gtp = Sep(", ", &self.grammar.type_parameters),
|
||||
)).emit()
|
||||
);
|
||||
rust!(self.out, "{{");
|
||||
|
||||
rust!(self.out, "match {p}reduce_index {{", p = self.prefix,);
|
||||
for (production, index) in self
|
||||
.grammar
|
||||
.nonterminals
|
||||
.values()
|
||||
.flat_map(|nt| &nt.productions)
|
||||
.zip(0..)
|
||||
{
|
||||
if Tls::session().emit_comments {
|
||||
rust!(self.out, "// simulate {:?}", production);
|
||||
}
|
||||
|
||||
// if we just reduced the start symbol, that is also an accept criteria
|
||||
if production.nonterminal == self.start_symbol {
|
||||
rust!(
|
||||
self.out,
|
||||
"{index} => {p}state_machine::SimulatedReduce::Accept,",
|
||||
index = index,
|
||||
p = self.prefix,
|
||||
);
|
||||
} else {
|
||||
let num_symbols = production.symbols.len();
|
||||
let nt = self
|
||||
.custom
|
||||
.all_nonterminals
|
||||
.iter()
|
||||
.position(|x| *x == production.nonterminal)
|
||||
.unwrap();
|
||||
rust!(self.out, "{} => {{", index);
|
||||
if DEBUG_PRINT {
|
||||
rust!(
|
||||
self.out,
|
||||
"println!(r##\"accepts: simulating {:?}\"##);",
|
||||
production
|
||||
);
|
||||
}
|
||||
rust!(
|
||||
self.out,
|
||||
"{p}state_machine::SimulatedReduce::Reduce {{",
|
||||
p = self.prefix,
|
||||
);
|
||||
rust!(
|
||||
self.out,
|
||||
"states_to_pop: {num_symbols},",
|
||||
num_symbols = num_symbols,
|
||||
);
|
||||
rust!(self.out, "nonterminal_produced: {nt},", nt = nt);
|
||||
rust!(self.out, "}}");
|
||||
rust!(self.out, "}}");
|
||||
}
|
||||
}
|
||||
rust!(
|
||||
self.out,
|
||||
"_ => panic!(\"invalid reduction index {{}}\", {}reduce_index)",
|
||||
self.prefix,
|
||||
);
|
||||
rust!(self.out, "}}"); // end match
|
||||
|
||||
rust!(self.out, "}}");
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// The `accepts` function
|
||||
///
|
||||
/// ```ignore
|
||||
@ -1918,6 +2105,7 @@ impl<'ascent, 'grammar, W: Write> CodeGenerator<'ascent, 'grammar, W, TableDrive
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let phantom_data_expr = self.phantom_data_expr();
|
||||
let actions_per_state = self.grammar.terminals.all.len();
|
||||
let parameters = vec![
|
||||
format!(
|
||||
@ -2020,54 +2208,23 @@ impl<'ascent, 'grammar, W: Write> CodeGenerator<'ascent, 'grammar, W, TableDrive
|
||||
// effect on the state stack.
|
||||
rust!(
|
||||
self.out,
|
||||
"let ({p}to_pop, {p}nt) = match -{p}action {{",
|
||||
p = self.prefix
|
||||
"let ({p}to_pop, {p}nt) = match {p}simulate_reduce(-({p}action + 1), {pde}) {{",
|
||||
p = self.prefix,
|
||||
pde = phantom_data_expr,
|
||||
);
|
||||
for (production, index) in self
|
||||
.grammar
|
||||
.nonterminals
|
||||
.values()
|
||||
.flat_map(|nt| &nt.productions)
|
||||
.zip(1..)
|
||||
{
|
||||
if Tls::session().emit_comments {
|
||||
rust!(self.out, "// simulate {:?}", production);
|
||||
}
|
||||
|
||||
// if we just reduced the start symbol, that is also an accept criteria
|
||||
if production.nonterminal == self.start_symbol {
|
||||
rust!(self.out, "{} => return true,", index);
|
||||
} else {
|
||||
let num_symbols = production.symbols.len();
|
||||
let nt = self
|
||||
.custom
|
||||
.all_nonterminals
|
||||
.iter()
|
||||
.position(|x| *x == production.nonterminal)
|
||||
.unwrap();
|
||||
rust!(self.out, "{} => {{", index);
|
||||
if DEBUG_PRINT {
|
||||
rust!(
|
||||
self.out,
|
||||
"println!(r##\"accepts: simulating {:?}\"##);",
|
||||
production
|
||||
);
|
||||
}
|
||||
rust!(
|
||||
self.out,
|
||||
"({num_symbols}, {nt})",
|
||||
num_symbols = num_symbols,
|
||||
nt = nt
|
||||
);
|
||||
rust!(self.out, "}}");
|
||||
}
|
||||
}
|
||||
rust!(
|
||||
self.out,
|
||||
"_ => panic!(\"invalid action code {{}}\", {}action)",
|
||||
self.prefix
|
||||
"{p}state_machine::SimulatedReduce::Reduce {{",
|
||||
p = self.prefix,
|
||||
);
|
||||
rust!(self.out, "}};"); // end match
|
||||
rust!(self.out, "states_to_pop, nonterminal_produced",);
|
||||
rust!(self.out, "}} => (states_to_pop, nonterminal_produced),",);
|
||||
rust!(
|
||||
self.out,
|
||||
"{p}state_machine::SimulatedReduce::Accept => return true,",
|
||||
p = self.prefix,
|
||||
);
|
||||
rust!(self.out, "}};");
|
||||
|
||||
rust!(self.out, "{p}states_len -= {p}to_pop;", p = self.prefix);
|
||||
rust!(
|
||||
@ -2110,9 +2267,9 @@ impl<'ascent, 'grammar, W: Write> CodeGenerator<'ascent, 'grammar, W, TableDrive
|
||||
|
||||
fn symbol_type(&self) -> String {
|
||||
format!(
|
||||
"{}Symbol<{}>",
|
||||
self.prefix,
|
||||
Sep(", ", &self.custom.symbol_type_params)
|
||||
"{p}Symbol<{stp}>",
|
||||
p = self.prefix,
|
||||
stp = Sep(", ", &self.custom.symbol_type_params),
|
||||
)
|
||||
}
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user