mirror of
https://github.com/fluencelabs/lalrpop
synced 2025-04-25 03:02:15 +00:00
perf: Emit the GOTO table as nested matches
Since most of the goto table is elements that will never be hit we can shrink the binary size by emitting it as nested matches instead without losing any performance.
This commit is contained in:
parent
688b91930a
commit
c5070af2ee
@ -246,12 +246,7 @@ impl<'ascent, 'grammar, W: Write> CodeGenerator<'ascent, 'grammar, W, TableDrive
|
|||||||
"fn action(&self, state: {state_type}, integer: usize) -> {state_type} {{",
|
"fn action(&self, state: {state_type}, integer: usize) -> {state_type} {{",
|
||||||
state_type = state_type
|
state_type = state_type
|
||||||
);
|
);
|
||||||
rust!(
|
rust!(self.out, "{p}action(state, integer)", p = self.prefix);
|
||||||
self.out,
|
|
||||||
"{p}ACTION[(state as usize) * {num_term} + integer]",
|
|
||||||
p = self.prefix,
|
|
||||||
num_term = self.grammar.terminals.all.len(),
|
|
||||||
);
|
|
||||||
rust!(self.out, "}}");
|
rust!(self.out, "}}");
|
||||||
|
|
||||||
rust!(self.out, "");
|
rust!(self.out, "");
|
||||||
@ -263,7 +258,7 @@ impl<'ascent, 'grammar, W: Write> CodeGenerator<'ascent, 'grammar, W, TableDrive
|
|||||||
);
|
);
|
||||||
rust!(
|
rust!(
|
||||||
self.out,
|
self.out,
|
||||||
"{p}ACTION[(state as usize) * {num_term} + ({num_term} - 1)]",
|
"{p}action(state, {num_term} - 1)",
|
||||||
p = self.prefix,
|
p = self.prefix,
|
||||||
num_term = self.grammar.terminals.all.len(),
|
num_term = self.grammar.terminals.all.len(),
|
||||||
);
|
);
|
||||||
@ -308,11 +303,7 @@ impl<'ascent, 'grammar, W: Write> CodeGenerator<'ascent, 'grammar, W, TableDrive
|
|||||||
"fn expected_tokens(&self, state: {state_type}) -> Vec<String> {{",
|
"fn expected_tokens(&self, state: {state_type}) -> Vec<String> {{",
|
||||||
state_type = state_type,
|
state_type = state_type,
|
||||||
);
|
);
|
||||||
rust!(
|
rust!(self.out, "{p}expected_tokens(state)", p = self.prefix);
|
||||||
self.out,
|
|
||||||
"{p}expected_tokens(state as usize)",
|
|
||||||
p = self.prefix
|
|
||||||
);
|
|
||||||
rust!(self.out, "}}");
|
rust!(self.out, "}}");
|
||||||
|
|
||||||
rust!(self.out, "");
|
rust!(self.out, "");
|
||||||
@ -471,13 +462,15 @@ impl<'ascent, 'grammar, W: Write> CodeGenerator<'ascent, 'grammar, W, TableDrive
|
|||||||
}
|
}
|
||||||
|
|
||||||
fn write_parse_table(&mut self) -> io::Result<()> {
|
fn write_parse_table(&mut self) -> io::Result<()> {
|
||||||
|
let state_type = self.custom.state_type;
|
||||||
|
|
||||||
// The table is a two-dimensional matrix indexed first by state
|
// The table is a two-dimensional matrix indexed first by state
|
||||||
// and then by the terminal index. The value is described above.
|
// and then by the terminal index. The value is described above.
|
||||||
rust!(
|
rust!(
|
||||||
self.out,
|
self.out,
|
||||||
"const {}ACTION: &[{}] = &[",
|
"const {}ACTION: &[{}] = &[",
|
||||||
self.prefix,
|
self.prefix,
|
||||||
self.custom.state_type
|
state_type
|
||||||
);
|
);
|
||||||
|
|
||||||
for (index, state) in self.states.iter().enumerate() {
|
for (index, state) in self.states.iter().enumerate() {
|
||||||
@ -506,6 +499,22 @@ impl<'ascent, 'grammar, W: Write> CodeGenerator<'ascent, 'grammar, W, TableDrive
|
|||||||
|
|
||||||
rust!(self.out, "];");
|
rust!(self.out, "];");
|
||||||
|
|
||||||
|
rust!(
|
||||||
|
self.out,
|
||||||
|
"fn {p}action(state: {state_type}, integer: usize) -> {state_type} {{",
|
||||||
|
p = self.prefix,
|
||||||
|
state_type = state_type,
|
||||||
|
);
|
||||||
|
|
||||||
|
rust!(
|
||||||
|
self.out,
|
||||||
|
"{p}ACTION[(state as usize) * {num_term} + integer]",
|
||||||
|
p = self.prefix,
|
||||||
|
num_term = self.grammar.terminals.all.len(),
|
||||||
|
);
|
||||||
|
|
||||||
|
rust!(self.out, "}}");
|
||||||
|
|
||||||
// Actions on EOF. Indexed just by state.
|
// Actions on EOF. Indexed just by state.
|
||||||
rust!(
|
rust!(
|
||||||
self.out,
|
self.out,
|
||||||
@ -520,19 +529,21 @@ impl<'ascent, 'grammar, W: Write> CodeGenerator<'ascent, 'grammar, W, TableDrive
|
|||||||
}
|
}
|
||||||
rust!(self.out, "];");
|
rust!(self.out, "];");
|
||||||
|
|
||||||
// The goto table is indexed by state and *nonterminal*.
|
|
||||||
rust!(
|
rust!(
|
||||||
self.out,
|
self.out,
|
||||||
"const {}GOTO: &[{}] = &[",
|
"fn goto(state: {state_type}, nt: usize) -> {state_type} {{",
|
||||||
self.prefix,
|
state_type = state_type,
|
||||||
self.custom.state_type
|
|
||||||
);
|
);
|
||||||
|
|
||||||
let mut row = Vec::new();
|
rust!(self.out, "let next_state = {{");
|
||||||
for (index, state) in self.states.iter().enumerate() {
|
Self::emit_lookup(
|
||||||
rust!(self.out, "// State {}", index);
|
self.out,
|
||||||
row.extend(self.grammar.nonterminals.keys().map(|nonterminal| {
|
"nt",
|
||||||
if let Some(&new_state) = state.gotos.get(&nonterminal) {
|
self.grammar.nonterminals.keys(),
|
||||||
|
"state",
|
||||||
|
self.states.iter(),
|
||||||
|
|nonterminal, state| {
|
||||||
|
if let Some(&new_state) = state.gotos.get(nonterminal) {
|
||||||
(
|
(
|
||||||
new_state.0 as i32 + 1,
|
new_state.0 as i32 + 1,
|
||||||
Comment::Goto(nonterminal, new_state.0),
|
Comment::Goto(nonterminal, new_state.0),
|
||||||
@ -540,27 +551,12 @@ impl<'ascent, 'grammar, W: Write> CodeGenerator<'ascent, 'grammar, W, TableDrive
|
|||||||
} else {
|
} else {
|
||||||
(0, Comment::Error(nonterminal))
|
(0, Comment::Error(nonterminal))
|
||||||
}
|
}
|
||||||
}));
|
},
|
||||||
// The remaining rows will be all error and is never accessed so we may omit them from the table
|
None,
|
||||||
if row.iter().all(|t| t.0 == 0) {
|
)?;
|
||||||
break;
|
rust!(self.out, "}};");
|
||||||
}
|
rust!(self.out, "next_state - 1");
|
||||||
self.out.write_table_row(row.drain(..))?;
|
|
||||||
}
|
|
||||||
rust!(self.out, "];");
|
|
||||||
|
|
||||||
let state_type = self.custom.state_type;
|
|
||||||
rust!(
|
|
||||||
self.out,
|
|
||||||
"fn goto(state: {state_type}, nt: usize) -> {state_type} {{",
|
|
||||||
state_type = state_type,
|
|
||||||
);
|
|
||||||
rust!(
|
|
||||||
self.out,
|
|
||||||
"{}GOTO[(state as usize) * {} + nt] - 1",
|
|
||||||
self.prefix,
|
|
||||||
self.grammar.nonterminals.len(),
|
|
||||||
);
|
|
||||||
rust!(self.out, "}}");
|
rust!(self.out, "}}");
|
||||||
|
|
||||||
self.emit_expected_tokens_fn()?;
|
self.emit_expected_tokens_fn()?;
|
||||||
@ -568,6 +564,95 @@ impl<'ascent, 'grammar, W: Write> CodeGenerator<'ascent, 'grammar, W, TableDrive
|
|||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn emit_lookup<'a, 'k, K: 'k, K2: 'k, T>(
|
||||||
|
out: &mut RustWrite<W>,
|
||||||
|
k_name: &str,
|
||||||
|
iter: impl IntoIterator<Item = &'k K>,
|
||||||
|
k2_name: &str,
|
||||||
|
iter2: impl IntoIterator<Item = &'k K2> + Clone,
|
||||||
|
mut state_lookup: impl FnMut(&'k K, &'k K2) -> (i32, Comment<'a, T>),
|
||||||
|
fallback: Option<i32>,
|
||||||
|
) -> io::Result<()> {
|
||||||
|
rust!(out, "match {} {{", k_name);
|
||||||
|
|
||||||
|
for (k_index, k) in iter.into_iter().enumerate() {
|
||||||
|
let iter = iter2
|
||||||
|
.clone()
|
||||||
|
.into_iter()
|
||||||
|
.map(|k2| state_lookup(k, k2))
|
||||||
|
.enumerate()
|
||||||
|
// Group consecutive indices
|
||||||
|
.group_by(|(_, (next_state, _))| *next_state);
|
||||||
|
let mut row = Vec::new();
|
||||||
|
row.extend(&iter);
|
||||||
|
|
||||||
|
if row.len() == 1 && row[0].0 == 0 {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
row.sort_by_key(|(next_state, _)| *next_state);
|
||||||
|
|
||||||
|
let mut largest_variant_index = 0;
|
||||||
|
let mut largest_variant = 0;
|
||||||
|
// Group by next_state
|
||||||
|
let variants: Vec<_> = (&row
|
||||||
|
.drain(..)
|
||||||
|
// We always emit a catch-all for 0 error states (which will never be hit)
|
||||||
|
.filter(|(next_state, _)| *next_state != 0)
|
||||||
|
.group_by(|(next_state, _)| *next_state))
|
||||||
|
.into_iter()
|
||||||
|
.enumerate()
|
||||||
|
.map(|(i, (next_state, group_group))| {
|
||||||
|
let vec = group_group
|
||||||
|
.map(|(_, mut group)| {
|
||||||
|
let (start, _) = group.next().unwrap();
|
||||||
|
(start, group.last().map(|(end, _)| end))
|
||||||
|
})
|
||||||
|
.collect::<Vec<_>>();
|
||||||
|
if vec.len() > largest_variant {
|
||||||
|
largest_variant_index = i;
|
||||||
|
largest_variant = vec.len();
|
||||||
|
}
|
||||||
|
(next_state, vec)
|
||||||
|
})
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
if fallback.is_none() && variants.len() == 1 {
|
||||||
|
rust!(out, "{} => {},", k_index, variants[0].0);
|
||||||
|
} else {
|
||||||
|
rust!(out, "{} => match {} {{", k_index, k2_name);
|
||||||
|
|
||||||
|
for (i, (next_state, ranges)) in variants.iter().enumerate() {
|
||||||
|
if fallback.is_none() && i == largest_variant_index {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
rust!(
|
||||||
|
out,
|
||||||
|
"{} => {},",
|
||||||
|
ranges
|
||||||
|
.iter()
|
||||||
|
.format_with(" | ", |(start, end), f| match end {
|
||||||
|
None => f(&format_args!("{}", start)),
|
||||||
|
Some(end) => f(&format_args!("{}..={}", start, end)),
|
||||||
|
}),
|
||||||
|
next_state,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
match fallback {
|
||||||
|
Some(fallback) => rust!(out, "_ => {},", fallback), // unreachable
|
||||||
|
None => rust!(out, "_ => {},", variants[largest_variant_index].0),
|
||||||
|
}
|
||||||
|
rust!(out, "}},");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
rust!(out, "_ => 0,"); // unreachable
|
||||||
|
rust!(out, "}}");
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
fn write_reduction<'s>(
|
fn write_reduction<'s>(
|
||||||
custom: &TableDriven<'grammar>,
|
custom: &TableDriven<'grammar>,
|
||||||
state: &'s LR1State,
|
state: &'s LR1State,
|
||||||
@ -1264,7 +1349,6 @@ impl<'ascent, 'grammar, W: Write> CodeGenerator<'ascent, 'grammar, W, TableDrive
|
|||||||
}
|
}
|
||||||
|
|
||||||
let phantom_data_expr = self.phantom_data_expr();
|
let phantom_data_expr = self.phantom_data_expr();
|
||||||
let actions_per_state = self.grammar.terminals.all.len();
|
|
||||||
let parameters = vec![
|
let parameters = vec![
|
||||||
format!(
|
format!(
|
||||||
"{p}error_state: {typ}",
|
"{p}error_state: {typ}",
|
||||||
@ -1316,7 +1400,7 @@ impl<'ascent, 'grammar, W: Write> CodeGenerator<'ascent, 'grammar, W, TableDrive
|
|||||||
|
|
||||||
rust!(
|
rust!(
|
||||||
self.out,
|
self.out,
|
||||||
"let {p}top = {p}states[{p}states_len - 1] as usize;",
|
"let {p}top = {p}states[{p}states_len - 1];",
|
||||||
p = self.prefix
|
p = self.prefix
|
||||||
);
|
);
|
||||||
|
|
||||||
@ -1340,9 +1424,8 @@ impl<'ascent, 'grammar, W: Write> CodeGenerator<'ascent, 'grammar, W, TableDrive
|
|||||||
);
|
);
|
||||||
rust!(
|
rust!(
|
||||||
self.out,
|
self.out,
|
||||||
"Some({p}integer) => {p}ACTION[{p}top * {actions_per_state} + {p}integer],",
|
"Some({p}integer) => {p}action({p}top, {p}integer),",
|
||||||
p = self.prefix,
|
p = self.prefix,
|
||||||
actions_per_state = actions_per_state,
|
|
||||||
);
|
);
|
||||||
rust!(self.out, "}};"); // end `match`
|
rust!(self.out, "}};"); // end `match`
|
||||||
|
|
||||||
@ -1436,9 +1519,9 @@ impl<'ascent, 'grammar, W: Write> CodeGenerator<'ascent, 'grammar, W, TableDrive
|
|||||||
fn emit_expected_tokens_fn(&mut self) -> io::Result<()> {
|
fn emit_expected_tokens_fn(&mut self) -> io::Result<()> {
|
||||||
rust!(
|
rust!(
|
||||||
self.out,
|
self.out,
|
||||||
"fn {}expected_tokens({}state: usize) -> Vec<::std::string::String> {{",
|
"fn {p}expected_tokens({p}state: {}) -> Vec<::std::string::String> {{",
|
||||||
self.prefix,
|
self.custom.state_type,
|
||||||
self.prefix
|
p = self.prefix
|
||||||
);
|
);
|
||||||
|
|
||||||
rust!(self.out, "const {}TERMINAL: &[&str] = &[", self.prefix);
|
rust!(self.out, "const {}TERMINAL: &[&str] = &[", self.prefix);
|
||||||
@ -1458,13 +1541,15 @@ impl<'ascent, 'grammar, W: Write> CodeGenerator<'ascent, 'grammar, W, TableDrive
|
|||||||
// Grab any terminals in the current state which would have resulted in a successful parse
|
// Grab any terminals in the current state which would have resulted in a successful parse
|
||||||
rust!(
|
rust!(
|
||||||
self.out,
|
self.out,
|
||||||
"{}ACTION[({}state * {})..].iter().zip({}TERMINAL).filter_map(|(&state, terminal)| {{",
|
"{}TERMINAL.iter().enumerate().filter_map(|(index, terminal)| {{",
|
||||||
self.prefix,
|
self.prefix,
|
||||||
self.prefix,
|
|
||||||
self.grammar.terminals.all.len(),
|
|
||||||
self.prefix
|
|
||||||
);
|
);
|
||||||
rust!(self.out, "if state == 0 {{");
|
rust!(
|
||||||
|
self.out,
|
||||||
|
"let next_state = {p}action({p}state, index);",
|
||||||
|
p = self.prefix
|
||||||
|
);
|
||||||
|
rust!(self.out, "if next_state == 0 {{");
|
||||||
rust!(self.out, "None");
|
rust!(self.out, "None");
|
||||||
rust!(self.out, "}} else {{");
|
rust!(self.out, "}} else {{");
|
||||||
rust!(self.out, "Some(terminal.to_string())");
|
rust!(self.out, "Some(terminal.to_string())");
|
||||||
|
File diff suppressed because it is too large
Load Diff
Loading…
x
Reference in New Issue
Block a user