perf: Emit the GOTO table as nested matches

Since most of the goto table is elements that will never be hit we can
shrink the binary size by emitting it as nested matches instead
without losing any performance.
This commit is contained in:
Markus Westerlind 2020-04-30 17:46:39 +02:00
parent 688b91930a
commit c5070af2ee
2 changed files with 2552 additions and 1800 deletions

View File

@ -246,12 +246,7 @@ impl<'ascent, 'grammar, W: Write> CodeGenerator<'ascent, 'grammar, W, TableDrive
"fn action(&self, state: {state_type}, integer: usize) -> {state_type} {{", "fn action(&self, state: {state_type}, integer: usize) -> {state_type} {{",
state_type = state_type state_type = state_type
); );
rust!( rust!(self.out, "{p}action(state, integer)", p = self.prefix);
self.out,
"{p}ACTION[(state as usize) * {num_term} + integer]",
p = self.prefix,
num_term = self.grammar.terminals.all.len(),
);
rust!(self.out, "}}"); rust!(self.out, "}}");
rust!(self.out, ""); rust!(self.out, "");
@ -263,7 +258,7 @@ impl<'ascent, 'grammar, W: Write> CodeGenerator<'ascent, 'grammar, W, TableDrive
); );
rust!( rust!(
self.out, self.out,
"{p}ACTION[(state as usize) * {num_term} + ({num_term} - 1)]", "{p}action(state, {num_term} - 1)",
p = self.prefix, p = self.prefix,
num_term = self.grammar.terminals.all.len(), num_term = self.grammar.terminals.all.len(),
); );
@ -308,11 +303,7 @@ impl<'ascent, 'grammar, W: Write> CodeGenerator<'ascent, 'grammar, W, TableDrive
"fn expected_tokens(&self, state: {state_type}) -> Vec<String> {{", "fn expected_tokens(&self, state: {state_type}) -> Vec<String> {{",
state_type = state_type, state_type = state_type,
); );
rust!( rust!(self.out, "{p}expected_tokens(state)", p = self.prefix);
self.out,
"{p}expected_tokens(state as usize)",
p = self.prefix
);
rust!(self.out, "}}"); rust!(self.out, "}}");
rust!(self.out, ""); rust!(self.out, "");
@ -471,13 +462,15 @@ impl<'ascent, 'grammar, W: Write> CodeGenerator<'ascent, 'grammar, W, TableDrive
} }
fn write_parse_table(&mut self) -> io::Result<()> { fn write_parse_table(&mut self) -> io::Result<()> {
let state_type = self.custom.state_type;
// The table is a two-dimensional matrix indexed first by state // The table is a two-dimensional matrix indexed first by state
// and then by the terminal index. The value is described above. // and then by the terminal index. The value is described above.
rust!( rust!(
self.out, self.out,
"const {}ACTION: &[{}] = &[", "const {}ACTION: &[{}] = &[",
self.prefix, self.prefix,
self.custom.state_type state_type
); );
for (index, state) in self.states.iter().enumerate() { for (index, state) in self.states.iter().enumerate() {
@ -506,6 +499,22 @@ impl<'ascent, 'grammar, W: Write> CodeGenerator<'ascent, 'grammar, W, TableDrive
rust!(self.out, "];"); rust!(self.out, "];");
rust!(
self.out,
"fn {p}action(state: {state_type}, integer: usize) -> {state_type} {{",
p = self.prefix,
state_type = state_type,
);
rust!(
self.out,
"{p}ACTION[(state as usize) * {num_term} + integer]",
p = self.prefix,
num_term = self.grammar.terminals.all.len(),
);
rust!(self.out, "}}");
// Actions on EOF. Indexed just by state. // Actions on EOF. Indexed just by state.
rust!( rust!(
self.out, self.out,
@ -520,19 +529,21 @@ impl<'ascent, 'grammar, W: Write> CodeGenerator<'ascent, 'grammar, W, TableDrive
} }
rust!(self.out, "];"); rust!(self.out, "];");
// The goto table is indexed by state and *nonterminal*.
rust!( rust!(
self.out, self.out,
"const {}GOTO: &[{}] = &[", "fn goto(state: {state_type}, nt: usize) -> {state_type} {{",
self.prefix, state_type = state_type,
self.custom.state_type
); );
let mut row = Vec::new(); rust!(self.out, "let next_state = {{");
for (index, state) in self.states.iter().enumerate() { Self::emit_lookup(
rust!(self.out, "// State {}", index); self.out,
row.extend(self.grammar.nonterminals.keys().map(|nonterminal| { "nt",
if let Some(&new_state) = state.gotos.get(&nonterminal) { self.grammar.nonterminals.keys(),
"state",
self.states.iter(),
|nonterminal, state| {
if let Some(&new_state) = state.gotos.get(nonterminal) {
( (
new_state.0 as i32 + 1, new_state.0 as i32 + 1,
Comment::Goto(nonterminal, new_state.0), Comment::Goto(nonterminal, new_state.0),
@ -540,27 +551,12 @@ impl<'ascent, 'grammar, W: Write> CodeGenerator<'ascent, 'grammar, W, TableDrive
} else { } else {
(0, Comment::Error(nonterminal)) (0, Comment::Error(nonterminal))
} }
})); },
// The remaining rows will be all error and is never accessed so we may omit them from the table None,
if row.iter().all(|t| t.0 == 0) { )?;
break; rust!(self.out, "}};");
} rust!(self.out, "next_state - 1");
self.out.write_table_row(row.drain(..))?;
}
rust!(self.out, "];");
let state_type = self.custom.state_type;
rust!(
self.out,
"fn goto(state: {state_type}, nt: usize) -> {state_type} {{",
state_type = state_type,
);
rust!(
self.out,
"{}GOTO[(state as usize) * {} + nt] - 1",
self.prefix,
self.grammar.nonterminals.len(),
);
rust!(self.out, "}}"); rust!(self.out, "}}");
self.emit_expected_tokens_fn()?; self.emit_expected_tokens_fn()?;
@ -568,6 +564,95 @@ impl<'ascent, 'grammar, W: Write> CodeGenerator<'ascent, 'grammar, W, TableDrive
Ok(()) Ok(())
} }
fn emit_lookup<'a, 'k, K: 'k, K2: 'k, T>(
out: &mut RustWrite<W>,
k_name: &str,
iter: impl IntoIterator<Item = &'k K>,
k2_name: &str,
iter2: impl IntoIterator<Item = &'k K2> + Clone,
mut state_lookup: impl FnMut(&'k K, &'k K2) -> (i32, Comment<'a, T>),
fallback: Option<i32>,
) -> io::Result<()> {
rust!(out, "match {} {{", k_name);
for (k_index, k) in iter.into_iter().enumerate() {
let iter = iter2
.clone()
.into_iter()
.map(|k2| state_lookup(k, k2))
.enumerate()
// Group consecutive indices
.group_by(|(_, (next_state, _))| *next_state);
let mut row = Vec::new();
row.extend(&iter);
if row.len() == 1 && row[0].0 == 0 {
continue;
}
row.sort_by_key(|(next_state, _)| *next_state);
let mut largest_variant_index = 0;
let mut largest_variant = 0;
// Group by next_state
let variants: Vec<_> = (&row
.drain(..)
// We always emit a catch-all for 0 error states (which will never be hit)
.filter(|(next_state, _)| *next_state != 0)
.group_by(|(next_state, _)| *next_state))
.into_iter()
.enumerate()
.map(|(i, (next_state, group_group))| {
let vec = group_group
.map(|(_, mut group)| {
let (start, _) = group.next().unwrap();
(start, group.last().map(|(end, _)| end))
})
.collect::<Vec<_>>();
if vec.len() > largest_variant {
largest_variant_index = i;
largest_variant = vec.len();
}
(next_state, vec)
})
.collect();
if fallback.is_none() && variants.len() == 1 {
rust!(out, "{} => {},", k_index, variants[0].0);
} else {
rust!(out, "{} => match {} {{", k_index, k2_name);
for (i, (next_state, ranges)) in variants.iter().enumerate() {
if fallback.is_none() && i == largest_variant_index {
continue;
}
rust!(
out,
"{} => {},",
ranges
.iter()
.format_with(" | ", |(start, end), f| match end {
None => f(&format_args!("{}", start)),
Some(end) => f(&format_args!("{}..={}", start, end)),
}),
next_state,
);
}
match fallback {
Some(fallback) => rust!(out, "_ => {},", fallback), // unreachable
None => rust!(out, "_ => {},", variants[largest_variant_index].0),
}
rust!(out, "}},");
}
}
rust!(out, "_ => 0,"); // unreachable
rust!(out, "}}");
Ok(())
}
fn write_reduction<'s>( fn write_reduction<'s>(
custom: &TableDriven<'grammar>, custom: &TableDriven<'grammar>,
state: &'s LR1State, state: &'s LR1State,
@ -1264,7 +1349,6 @@ impl<'ascent, 'grammar, W: Write> CodeGenerator<'ascent, 'grammar, W, TableDrive
} }
let phantom_data_expr = self.phantom_data_expr(); let phantom_data_expr = self.phantom_data_expr();
let actions_per_state = self.grammar.terminals.all.len();
let parameters = vec![ let parameters = vec![
format!( format!(
"{p}error_state: {typ}", "{p}error_state: {typ}",
@ -1316,7 +1400,7 @@ impl<'ascent, 'grammar, W: Write> CodeGenerator<'ascent, 'grammar, W, TableDrive
rust!( rust!(
self.out, self.out,
"let {p}top = {p}states[{p}states_len - 1] as usize;", "let {p}top = {p}states[{p}states_len - 1];",
p = self.prefix p = self.prefix
); );
@ -1340,9 +1424,8 @@ impl<'ascent, 'grammar, W: Write> CodeGenerator<'ascent, 'grammar, W, TableDrive
); );
rust!( rust!(
self.out, self.out,
"Some({p}integer) => {p}ACTION[{p}top * {actions_per_state} + {p}integer],", "Some({p}integer) => {p}action({p}top, {p}integer),",
p = self.prefix, p = self.prefix,
actions_per_state = actions_per_state,
); );
rust!(self.out, "}};"); // end `match` rust!(self.out, "}};"); // end `match`
@ -1436,9 +1519,9 @@ impl<'ascent, 'grammar, W: Write> CodeGenerator<'ascent, 'grammar, W, TableDrive
fn emit_expected_tokens_fn(&mut self) -> io::Result<()> { fn emit_expected_tokens_fn(&mut self) -> io::Result<()> {
rust!( rust!(
self.out, self.out,
"fn {}expected_tokens({}state: usize) -> Vec<::std::string::String> {{", "fn {p}expected_tokens({p}state: {}) -> Vec<::std::string::String> {{",
self.prefix, self.custom.state_type,
self.prefix p = self.prefix
); );
rust!(self.out, "const {}TERMINAL: &[&str] = &[", self.prefix); rust!(self.out, "const {}TERMINAL: &[&str] = &[", self.prefix);
@ -1458,13 +1541,15 @@ impl<'ascent, 'grammar, W: Write> CodeGenerator<'ascent, 'grammar, W, TableDrive
// Grab any terminals in the current state which would have resulted in a successful parse // Grab any terminals in the current state which would have resulted in a successful parse
rust!( rust!(
self.out, self.out,
"{}ACTION[({}state * {})..].iter().zip({}TERMINAL).filter_map(|(&state, terminal)| {{", "{}TERMINAL.iter().enumerate().filter_map(|(index, terminal)| {{",
self.prefix, self.prefix,
self.prefix,
self.grammar.terminals.all.len(),
self.prefix
); );
rust!(self.out, "if state == 0 {{"); rust!(
self.out,
"let next_state = {p}action({p}state, index);",
p = self.prefix
);
rust!(self.out, "if next_state == 0 {{");
rust!(self.out, "None"); rust!(self.out, "None");
rust!(self.out, "}} else {{"); rust!(self.out, "}} else {{");
rust!(self.out, "Some(terminal.to_string())"); rust!(self.out, "Some(terminal.to_string())");

File diff suppressed because it is too large Load Diff