mirror of
https://github.com/fluencelabs/lalrpop
synced 2025-04-25 11:12:14 +00:00
perf(parse_table): Avoid generating unused rows in the matrix
Only the states in the beginning of a reduction are actually used, the states for all other rows will never be used (really, no zero elements will be accessed in the GOTO table so we might be able to do even better without compromising performance). By simply reordering the states so that states needed in the GOTO table occupy the lower indices we can simply avoid generating the remaining part of the GOTO table. For the LALRPOP parser itself this is a reduction of about -40% (~100kB) of the size of the parse tables (not counting the generated code)
This commit is contained in:
parent
cb1924632b
commit
688b91930a
@ -376,7 +376,7 @@ fn emit_recursive_ascent(
|
|||||||
lr1::generate_report(&mut output_report_file, &lr1result)?;
|
lr1::generate_report(&mut output_report_file, &lr1result)?;
|
||||||
}
|
}
|
||||||
|
|
||||||
let mut states = match lr1result {
|
let states = match lr1result {
|
||||||
Ok(states) => states,
|
Ok(states) => states,
|
||||||
Err(error) => {
|
Err(error) => {
|
||||||
let messages = lr1::report_error(&grammar, &error);
|
let messages = lr1::report_error(&grammar, &error);
|
||||||
@ -385,34 +385,6 @@ fn emit_recursive_ascent(
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
let mut start_states = vec![false; states.len()];
|
|
||||||
for (index, state) in states.iter_mut().enumerate() {
|
|
||||||
debug_assert!(state.index.0 == index);
|
|
||||||
if grammar
|
|
||||||
.nonterminals
|
|
||||||
.keys()
|
|
||||||
.any(|nonterminal| state.gotos.get(&nonterminal).is_some())
|
|
||||||
{
|
|
||||||
start_states[index] = true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
states.sort_by_key(|state| start_states[state.index.0]);
|
|
||||||
|
|
||||||
let mut state_rewrite = vec![0; states.len()];
|
|
||||||
for (new_index, state) in states.iter_mut().enumerate() {
|
|
||||||
state_rewrite[state.index.0] = new_index;
|
|
||||||
state.index.0 = new_index;
|
|
||||||
}
|
|
||||||
|
|
||||||
for state in &mut states {
|
|
||||||
for goto in state.gotos.values_mut() {
|
|
||||||
goto.0 = state_rewrite[goto.0];
|
|
||||||
}
|
|
||||||
for shift in state.shifts.values_mut() {
|
|
||||||
shift.0 = state_rewrite[shift.0];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
match grammar.algorithm.codegen {
|
match grammar.algorithm.codegen {
|
||||||
r::LrCodeGeneration::RecursiveAscent => lr1::codegen::ascent::compile(
|
r::LrCodeGeneration::RecursiveAscent => lr1::codegen::ascent::compile(
|
||||||
&grammar,
|
&grammar,
|
||||||
|
@ -7,9 +7,8 @@ use crate::lr1::core::*;
|
|||||||
use crate::lr1::first;
|
use crate::lr1::first;
|
||||||
use crate::lr1::lane_table::*;
|
use crate::lr1::lane_table::*;
|
||||||
use crate::lr1::lookahead::*;
|
use crate::lr1::lookahead::*;
|
||||||
use std::env;
|
|
||||||
use std::rc::Rc;
|
|
||||||
use crate::tls::Tls;
|
use crate::tls::Tls;
|
||||||
|
use std::env;
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod test;
|
mod test;
|
||||||
@ -254,9 +253,7 @@ impl<'grammar, L: LookaheadBuild> LR<'grammar, L> {
|
|||||||
.map(|(lr0_item, lookahead)| lr0_item.with_lookahead(lookahead))
|
.map(|(lr0_item, lookahead)| lr0_item.with_lookahead(lookahead))
|
||||||
.collect();
|
.collect();
|
||||||
|
|
||||||
Items {
|
Items { vec: final_items }
|
||||||
vec: Rc::new(final_items),
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2,13 +2,12 @@
|
|||||||
|
|
||||||
use crate::collections::{map, Map, Multimap};
|
use crate::collections::{map, Map, Multimap};
|
||||||
use crate::grammar::repr::*;
|
use crate::grammar::repr::*;
|
||||||
use itertools::Itertools;
|
|
||||||
use crate::lr1::build;
|
use crate::lr1::build;
|
||||||
use crate::lr1::core::*;
|
use crate::lr1::core::*;
|
||||||
use crate::lr1::lookahead::*;
|
use crate::lr1::lookahead::*;
|
||||||
use std::mem;
|
|
||||||
use std::rc::Rc;
|
|
||||||
use crate::tls::Tls;
|
use crate::tls::Tls;
|
||||||
|
use itertools::Itertools;
|
||||||
|
use std::mem;
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod test;
|
mod test;
|
||||||
@ -39,11 +38,13 @@ pub fn build_lalr_states<'grammar>(
|
|||||||
return Ok(lr_states);
|
return Ok(lr_states);
|
||||||
}
|
}
|
||||||
|
|
||||||
profile! {
|
let lr1_states = profile! {
|
||||||
&Tls::session(),
|
&Tls::session(),
|
||||||
"LALR(1) state collapse",
|
"LALR(1) state collapse",
|
||||||
collapse_to_lalr_states(&lr_states)
|
collapse_to_lalr_states(&lr_states)
|
||||||
}
|
}?;
|
||||||
|
|
||||||
|
Ok(lr1_states)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn collapse_to_lalr_states<'grammar>(lr_states: &[LR1State<'grammar>]) -> LR1Result<'grammar> {
|
pub fn collapse_to_lalr_states<'grammar>(lr_states: &[LR1State<'grammar>]) -> LR1Result<'grammar> {
|
||||||
@ -138,9 +139,7 @@ pub fn collapse_to_lalr_states<'grammar>(lr_states: &[LR1State<'grammar>]) -> LR
|
|||||||
.into_iter()
|
.into_iter()
|
||||||
.map(|lr| State {
|
.map(|lr| State {
|
||||||
index: lr.index,
|
index: lr.index,
|
||||||
items: Items {
|
items: Items { vec: lr.items },
|
||||||
vec: Rc::new(lr.items),
|
|
||||||
},
|
|
||||||
shifts: lr.shifts,
|
shifts: lr.shifts,
|
||||||
reductions: lr.reductions.into_iter().map(|(p, ts)| (ts, p)).collect(),
|
reductions: lr.reductions.into_iter().map(|(p, ts)| (ts, p)).collect(),
|
||||||
gotos: lr.gotos,
|
gotos: lr.gotos,
|
||||||
|
@ -527,9 +527,11 @@ impl<'ascent, 'grammar, W: Write> CodeGenerator<'ascent, 'grammar, W, TableDrive
|
|||||||
self.prefix,
|
self.prefix,
|
||||||
self.custom.state_type
|
self.custom.state_type
|
||||||
);
|
);
|
||||||
|
|
||||||
|
let mut row = Vec::new();
|
||||||
for (index, state) in self.states.iter().enumerate() {
|
for (index, state) in self.states.iter().enumerate() {
|
||||||
rust!(self.out, "// State {}", index);
|
rust!(self.out, "// State {}", index);
|
||||||
let iterator = self.grammar.nonterminals.keys().map(|nonterminal| {
|
row.extend(self.grammar.nonterminals.keys().map(|nonterminal| {
|
||||||
if let Some(&new_state) = state.gotos.get(&nonterminal) {
|
if let Some(&new_state) = state.gotos.get(&nonterminal) {
|
||||||
(
|
(
|
||||||
new_state.0 as i32 + 1,
|
new_state.0 as i32 + 1,
|
||||||
@ -538,8 +540,12 @@ impl<'ascent, 'grammar, W: Write> CodeGenerator<'ascent, 'grammar, W, TableDrive
|
|||||||
} else {
|
} else {
|
||||||
(0, Comment::Error(nonterminal))
|
(0, Comment::Error(nonterminal))
|
||||||
}
|
}
|
||||||
});
|
}));
|
||||||
self.out.write_table_row(iterator)?;
|
// The remaining rows will be all error and is never accessed so we may omit them from the table
|
||||||
|
if row.iter().all(|t| t.0 == 0) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
self.out.write_table_row(row.drain(..))?;
|
||||||
}
|
}
|
||||||
rust!(self.out, "];");
|
rust!(self.out, "];");
|
||||||
|
|
||||||
|
@ -5,7 +5,6 @@ use crate::grammar::repr::*;
|
|||||||
use crate::util::Prefix;
|
use crate::util::Prefix;
|
||||||
use itertools::Itertools;
|
use itertools::Itertools;
|
||||||
use std::fmt::{Debug, Display, Error, Formatter};
|
use std::fmt::{Debug, Display, Error, Formatter};
|
||||||
use std::rc::Rc;
|
|
||||||
|
|
||||||
use super::lookahead::*;
|
use super::lookahead::*;
|
||||||
|
|
||||||
@ -123,7 +122,7 @@ pub struct StateIndex(pub usize);
|
|||||||
|
|
||||||
#[derive(Clone, Debug, Hash, PartialEq, Eq, PartialOrd, Ord)]
|
#[derive(Clone, Debug, Hash, PartialEq, Eq, PartialOrd, Ord)]
|
||||||
pub struct Items<'grammar, L: Lookahead> {
|
pub struct Items<'grammar, L: Lookahead> {
|
||||||
pub vec: Rc<Vec<Item<'grammar, L>>>,
|
pub vec: Vec<Item<'grammar, L>>,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[allow(dead_code)]
|
#[allow(dead_code)]
|
||||||
|
@ -1,7 +1,6 @@
|
|||||||
//!
|
//!
|
||||||
|
|
||||||
use crate::collections::{Map, Set};
|
use crate::collections::{Map, Set};
|
||||||
use ena::unify::InPlaceUnificationTable;
|
|
||||||
use crate::grammar::repr::*;
|
use crate::grammar::repr::*;
|
||||||
use crate::lr1::build;
|
use crate::lr1::build;
|
||||||
use crate::lr1::core::*;
|
use crate::lr1::core::*;
|
||||||
@ -11,7 +10,7 @@ use crate::lr1::lane_table::table::context_set::OverlappingLookahead;
|
|||||||
use crate::lr1::lane_table::table::{ConflictIndex, LaneTable};
|
use crate::lr1::lane_table::table::{ConflictIndex, LaneTable};
|
||||||
use crate::lr1::lookahead::{Lookahead, TokenSet};
|
use crate::lr1::lookahead::{Lookahead, TokenSet};
|
||||||
use crate::lr1::state_graph::StateGraph;
|
use crate::lr1::state_graph::StateGraph;
|
||||||
use std::rc::Rc;
|
use ena::unify::InPlaceUnificationTable;
|
||||||
|
|
||||||
mod merge;
|
mod merge;
|
||||||
use self::merge::Merge;
|
use self::merge::Merge;
|
||||||
@ -120,9 +119,7 @@ impl<'grammar> LaneTableConstruct<'grammar> {
|
|||||||
.collect();
|
.collect();
|
||||||
State {
|
State {
|
||||||
index: s.index,
|
index: s.index,
|
||||||
items: Items {
|
items: Items { vec: items },
|
||||||
vec: Rc::new(items),
|
|
||||||
},
|
|
||||||
shifts: s.shifts,
|
shifts: s.shifts,
|
||||||
reductions,
|
reductions,
|
||||||
gotos: s.gotos,
|
gotos: s.gotos,
|
||||||
|
@ -28,11 +28,15 @@ pub fn build_states<'grammar>(
|
|||||||
grammar: &'grammar Grammar,
|
grammar: &'grammar Grammar,
|
||||||
start: NonterminalString,
|
start: NonterminalString,
|
||||||
) -> LR1Result<'grammar> {
|
) -> LR1Result<'grammar> {
|
||||||
if !grammar.algorithm.lalr {
|
let mut lr1_states = if !grammar.algorithm.lalr {
|
||||||
build::build_lr1_states(grammar, start)
|
build::build_lr1_states(grammar, start)?
|
||||||
} else {
|
} else {
|
||||||
build_lalr::build_lalr_states(grammar, start)
|
build_lalr::build_lalr_states(grammar, start)?
|
||||||
}
|
};
|
||||||
|
|
||||||
|
rewrite_state_indices(grammar, &mut lr1_states);
|
||||||
|
|
||||||
|
Ok(lr1_states)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn generate_report<'grammar, W: Write + 'grammar>(
|
pub fn generate_report<'grammar, W: Write + 'grammar>(
|
||||||
@ -41,3 +45,37 @@ pub fn generate_report<'grammar, W: Write + 'grammar>(
|
|||||||
) -> io::Result<()> {
|
) -> io::Result<()> {
|
||||||
report::generate_report(out, lr1result)
|
report::generate_report(out, lr1result)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// By packing all states which start a reduction we can generate a smaller goto table as any
|
||||||
|
/// states not starting a reduction will not need a row
|
||||||
|
fn rewrite_state_indices(grammar: &Grammar, states: &mut [core::LR1State]) {
|
||||||
|
let mut start_states = vec![false; states.len()];
|
||||||
|
for (index, state) in states.iter_mut().enumerate() {
|
||||||
|
debug_assert!(state.index.0 == index);
|
||||||
|
if grammar
|
||||||
|
.nonterminals
|
||||||
|
.keys()
|
||||||
|
.any(|nonterminal| state.gotos.get(&nonterminal).is_some())
|
||||||
|
{
|
||||||
|
start_states[index] = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Since the sort is stable and we put starting states first, the initial state is still 0
|
||||||
|
states.sort_by_key(|state| !start_states[state.index.0]);
|
||||||
|
|
||||||
|
let mut state_rewrite = vec![0; states.len()];
|
||||||
|
for (new_index, state) in states.iter_mut().enumerate() {
|
||||||
|
state_rewrite[state.index.0] = new_index;
|
||||||
|
state.index.0 = new_index;
|
||||||
|
}
|
||||||
|
|
||||||
|
for state in states {
|
||||||
|
for goto in state.gotos.values_mut() {
|
||||||
|
goto.0 = state_rewrite[goto.0];
|
||||||
|
}
|
||||||
|
for shift in state.shifts.values_mut() {
|
||||||
|
shift.0 = state_rewrite[shift.0];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
File diff suppressed because it is too large
Load Diff
Loading…
x
Reference in New Issue
Block a user