perf(parse_table): Avoid generating unused rows in the matrix

Only the states in the beginning of a reduction are actually used, the
states for all other rows will never be used (really, no zero elements
will be accessed in the GOTO table so we might be able to do even better
without compromising performance).

By simply reordering the states so that states needed in the GOTO table
occupy the lower indices we can simply avoid generating the remaining
part of the GOTO table.

For the LALRPOP parser itself this is a reduction of about -40% (~100kB)
of the size of the parse tables (not counting the generated code)
This commit is contained in:
Markus Westerlind 2020-04-30 00:58:11 +02:00
parent cb1924632b
commit 688b91930a
8 changed files with 1339 additions and 1961 deletions

View File

@ -376,7 +376,7 @@ fn emit_recursive_ascent(
lr1::generate_report(&mut output_report_file, &lr1result)?;
}
let mut states = match lr1result {
let states = match lr1result {
Ok(states) => states,
Err(error) => {
let messages = lr1::report_error(&grammar, &error);
@ -385,34 +385,6 @@ fn emit_recursive_ascent(
}
};
let mut start_states = vec![false; states.len()];
for (index, state) in states.iter_mut().enumerate() {
debug_assert!(state.index.0 == index);
if grammar
.nonterminals
.keys()
.any(|nonterminal| state.gotos.get(&nonterminal).is_some())
{
start_states[index] = true;
}
}
states.sort_by_key(|state| start_states[state.index.0]);
let mut state_rewrite = vec![0; states.len()];
for (new_index, state) in states.iter_mut().enumerate() {
state_rewrite[state.index.0] = new_index;
state.index.0 = new_index;
}
for state in &mut states {
for goto in state.gotos.values_mut() {
goto.0 = state_rewrite[goto.0];
}
for shift in state.shifts.values_mut() {
shift.0 = state_rewrite[shift.0];
}
}
match grammar.algorithm.codegen {
r::LrCodeGeneration::RecursiveAscent => lr1::codegen::ascent::compile(
&grammar,

View File

@ -7,9 +7,8 @@ use crate::lr1::core::*;
use crate::lr1::first;
use crate::lr1::lane_table::*;
use crate::lr1::lookahead::*;
use std::env;
use std::rc::Rc;
use crate::tls::Tls;
use std::env;
#[cfg(test)]
mod test;
@ -254,9 +253,7 @@ impl<'grammar, L: LookaheadBuild> LR<'grammar, L> {
.map(|(lr0_item, lookahead)| lr0_item.with_lookahead(lookahead))
.collect();
Items {
vec: Rc::new(final_items),
}
Items { vec: final_items }
}
}

View File

@ -2,13 +2,12 @@
use crate::collections::{map, Map, Multimap};
use crate::grammar::repr::*;
use itertools::Itertools;
use crate::lr1::build;
use crate::lr1::core::*;
use crate::lr1::lookahead::*;
use std::mem;
use std::rc::Rc;
use crate::tls::Tls;
use itertools::Itertools;
use std::mem;
#[cfg(test)]
mod test;
@ -39,11 +38,13 @@ pub fn build_lalr_states<'grammar>(
return Ok(lr_states);
}
profile! {
let lr1_states = profile! {
&Tls::session(),
"LALR(1) state collapse",
collapse_to_lalr_states(&lr_states)
}
}?;
Ok(lr1_states)
}
pub fn collapse_to_lalr_states<'grammar>(lr_states: &[LR1State<'grammar>]) -> LR1Result<'grammar> {
@ -138,9 +139,7 @@ pub fn collapse_to_lalr_states<'grammar>(lr_states: &[LR1State<'grammar>]) -> LR
.into_iter()
.map(|lr| State {
index: lr.index,
items: Items {
vec: Rc::new(lr.items),
},
items: Items { vec: lr.items },
shifts: lr.shifts,
reductions: lr.reductions.into_iter().map(|(p, ts)| (ts, p)).collect(),
gotos: lr.gotos,

View File

@ -527,9 +527,11 @@ impl<'ascent, 'grammar, W: Write> CodeGenerator<'ascent, 'grammar, W, TableDrive
self.prefix,
self.custom.state_type
);
let mut row = Vec::new();
for (index, state) in self.states.iter().enumerate() {
rust!(self.out, "// State {}", index);
let iterator = self.grammar.nonterminals.keys().map(|nonterminal| {
row.extend(self.grammar.nonterminals.keys().map(|nonterminal| {
if let Some(&new_state) = state.gotos.get(&nonterminal) {
(
new_state.0 as i32 + 1,
@ -538,8 +540,12 @@ impl<'ascent, 'grammar, W: Write> CodeGenerator<'ascent, 'grammar, W, TableDrive
} else {
(0, Comment::Error(nonterminal))
}
});
self.out.write_table_row(iterator)?;
}));
// The remaining rows will be all error and is never accessed so we may omit them from the table
if row.iter().all(|t| t.0 == 0) {
break;
}
self.out.write_table_row(row.drain(..))?;
}
rust!(self.out, "];");

View File

@ -5,7 +5,6 @@ use crate::grammar::repr::*;
use crate::util::Prefix;
use itertools::Itertools;
use std::fmt::{Debug, Display, Error, Formatter};
use std::rc::Rc;
use super::lookahead::*;
@ -123,7 +122,7 @@ pub struct StateIndex(pub usize);
#[derive(Clone, Debug, Hash, PartialEq, Eq, PartialOrd, Ord)]
pub struct Items<'grammar, L: Lookahead> {
pub vec: Rc<Vec<Item<'grammar, L>>>,
pub vec: Vec<Item<'grammar, L>>,
}
#[allow(dead_code)]

View File

@ -1,7 +1,6 @@
//!
use crate::collections::{Map, Set};
use ena::unify::InPlaceUnificationTable;
use crate::grammar::repr::*;
use crate::lr1::build;
use crate::lr1::core::*;
@ -11,7 +10,7 @@ use crate::lr1::lane_table::table::context_set::OverlappingLookahead;
use crate::lr1::lane_table::table::{ConflictIndex, LaneTable};
use crate::lr1::lookahead::{Lookahead, TokenSet};
use crate::lr1::state_graph::StateGraph;
use std::rc::Rc;
use ena::unify::InPlaceUnificationTable;
mod merge;
use self::merge::Merge;
@ -120,9 +119,7 @@ impl<'grammar> LaneTableConstruct<'grammar> {
.collect();
State {
index: s.index,
items: Items {
vec: Rc::new(items),
},
items: Items { vec: items },
shifts: s.shifts,
reductions,
gotos: s.gotos,

View File

@ -28,11 +28,15 @@ pub fn build_states<'grammar>(
grammar: &'grammar Grammar,
start: NonterminalString,
) -> LR1Result<'grammar> {
if !grammar.algorithm.lalr {
build::build_lr1_states(grammar, start)
let mut lr1_states = if !grammar.algorithm.lalr {
build::build_lr1_states(grammar, start)?
} else {
build_lalr::build_lalr_states(grammar, start)
}
build_lalr::build_lalr_states(grammar, start)?
};
rewrite_state_indices(grammar, &mut lr1_states);
Ok(lr1_states)
}
pub fn generate_report<'grammar, W: Write + 'grammar>(
@ -41,3 +45,37 @@ pub fn generate_report<'grammar, W: Write + 'grammar>(
) -> io::Result<()> {
report::generate_report(out, lr1result)
}
/// By packing all states which start a reduction we can generate a smaller goto table as any
/// states not starting a reduction will not need a row
fn rewrite_state_indices(grammar: &Grammar, states: &mut [core::LR1State]) {
let mut start_states = vec![false; states.len()];
for (index, state) in states.iter_mut().enumerate() {
debug_assert!(state.index.0 == index);
if grammar
.nonterminals
.keys()
.any(|nonterminal| state.gotos.get(&nonterminal).is_some())
{
start_states[index] = true;
}
}
// Since the sort is stable and we put starting states first, the initial state is still 0
states.sort_by_key(|state| !start_states[state.index.0]);
let mut state_rewrite = vec![0; states.len()];
for (new_index, state) in states.iter_mut().enumerate() {
state_rewrite[state.index.0] = new_index;
state.index.0 = new_index;
}
for state in states {
for goto in state.gotos.values_mut() {
goto.0 = state_rewrite[goto.0];
}
for shift in state.shifts.values_mut() {
shift.0 = state_rewrite[shift.0];
}
}
}

File diff suppressed because it is too large Load Diff