wasm-utils/src/optimizer.rs

550 lines
21 KiB
Rust
Raw Normal View History

2017-05-04 16:08:57 +03:00
use std::collections::HashSet;
use parity_wasm::elements;
use symbols::{Symbol, expand_symbols, push_code_symbols, resolve_function};
#[derive(Debug)]
pub enum Error {
/// Since optimizer starts with export entries, export
/// section is supposed to exist.
NoExportSection,
}
2017-05-04 16:08:57 +03:00
pub fn optimize(
module: &mut elements::Module, // Module to optimize
2017-05-04 16:12:27 +03:00
used_exports: Vec<&str>, // List of only exports that will be usable after optimization
) -> Result<(), Error> {
2017-05-04 16:08:57 +03:00
// WebAssembly exports optimizer
// Motivation: emscripten compiler backend compiles in many unused exports
// which in turn compile in unused imports and leaves unused functions
// Algo starts from the top, listing all items that should stay
let mut stay = HashSet::new();
for (index, entry) in module.export_section().ok_or(Error::NoExportSection)?.entries().iter().enumerate() {
2017-05-04 16:08:57 +03:00
if used_exports.iter().find(|e| **e == entry.field()).is_some() {
stay.insert(Symbol::Export(index));
}
}
// All symbols used in data/element segments are also should be preserved
let mut init_symbols = Vec::new();
if let Some(data_section) = module.data_section() {
for segment in data_section.entries() {
push_code_symbols(&module, segment.offset().code(), &mut init_symbols);
}
}
if let Some(elements_section) = module.elements_section() {
for segment in elements_section.entries() {
push_code_symbols(&module, segment.offset().code(), &mut init_symbols);
for func_index in segment.members() {
stay.insert(resolve_function(&module, *func_index));
}
}
}
for symbol in init_symbols.drain(..) { stay.insert(symbol); }
// Call function which will traverse the list recursively, filling stay with all symbols
// that are already used by those which already there
expand_symbols(module, &mut stay);
for symbol in stay.iter() {
2017-05-05 15:51:08 +03:00
trace!("symbol to stay: {:?}", symbol);
2017-05-04 16:08:57 +03:00
}
// Keep track of referreable symbols to rewire calls/globals
let mut eliminated_funcs = Vec::new();
let mut eliminated_globals = Vec::new();
let mut eliminated_types = Vec::new();
// First, iterate through types
let mut index = 0;
let mut old_index = 0;
{
loop {
2017-05-09 13:04:40 +03:00
if type_section(module).map(|section| section.types_mut().len()).unwrap_or(0) == index { break; }
2017-05-04 16:08:57 +03:00
if stay.contains(&Symbol::Type(old_index)) {
index += 1;
} else {
2017-05-09 13:04:40 +03:00
type_section(module)
.expect("If type section does not exists, the loop will break at the beginning of first iteration")
.types_mut().remove(index);
2017-05-04 16:08:57 +03:00
eliminated_types.push(old_index);
2017-05-05 15:51:08 +03:00
trace!("Eliminated type({})", old_index);
2017-05-04 16:08:57 +03:00
}
old_index += 1;
}
}
// Second, iterate through imports
let mut top_funcs = 0;
let mut top_globals = 0;
2017-05-09 13:04:40 +03:00
index = 0;
old_index = 0;
2017-05-04 16:08:57 +03:00
2017-05-09 13:04:40 +03:00
if let Some(imports) = import_section(module) {
2017-05-04 16:08:57 +03:00
loop {
let mut remove = false;
match imports.entries()[index].external() {
&elements::External::Function(_) => {
if stay.contains(&Symbol::Import(old_index)) {
index += 1;
} else {
remove = true;
eliminated_funcs.push(top_funcs);
2017-05-05 15:51:08 +03:00
trace!("Eliminated import({}) func({}, {})", old_index, top_funcs, imports.entries()[index].field());
2017-05-04 16:08:57 +03:00
}
top_funcs += 1;
},
&elements::External::Global(_) => {
if stay.contains(&Symbol::Import(old_index)) {
index += 1;
} else {
remove = true;
eliminated_globals.push(top_globals);
2017-05-05 15:51:08 +03:00
trace!("Eliminated import({}) global({}, {})", old_index, top_globals, imports.entries()[index].field());
2017-05-04 16:08:57 +03:00
}
top_globals += 1;
},
_ => {
index += 1;
}
}
if remove {
imports.entries_mut().remove(index);
}
old_index += 1;
if index == imports.entries().len() { break; }
}
}
// Third, iterate through globals
2017-05-09 13:04:40 +03:00
if let Some(globals) = global_section(module) {
2017-05-04 16:08:57 +03:00
index = 0;
old_index = 0;
loop {
if globals.entries_mut().len() == index { break; }
if stay.contains(&Symbol::Global(old_index)) {
index += 1;
} else {
globals.entries_mut().remove(index);
eliminated_globals.push(top_globals + old_index);
2017-05-05 15:51:08 +03:00
trace!("Eliminated global({})", top_globals + old_index);
2017-05-04 16:08:57 +03:00
}
old_index += 1;
}
}
// Forth, delete orphaned functions
2017-05-09 13:04:40 +03:00
if functions_section(module).is_some() && code_section(module).is_some() {
index = 0;
old_index = 0;
2017-05-04 16:08:57 +03:00
2017-05-09 13:04:40 +03:00
loop {
if functions_section(module).expect("Functons section to exist").entries_mut().len() == index { break; }
if stay.contains(&Symbol::Function(old_index)) {
index += 1;
} else {
functions_section(module).expect("Functons section to exist").entries_mut().remove(index);
code_section(module).expect("Code section to exist").bodies_mut().remove(index);
2017-05-04 16:08:57 +03:00
2017-05-09 13:04:40 +03:00
eliminated_funcs.push(top_funcs + old_index);
trace!("Eliminated function({})", top_funcs + old_index);
}
old_index += 1;
2017-05-04 16:08:57 +03:00
}
}
2017-05-09 13:04:40 +03:00
// Fifth, eliminate unused exports
2017-05-04 16:08:57 +03:00
{
2017-05-09 13:04:40 +03:00
let exports = export_section(module).ok_or(Error::NoExportSection)?;
2017-05-04 16:08:57 +03:00
index = 0;
old_index = 0;
loop {
if exports.entries_mut().len() == index { break; }
if stay.contains(&Symbol::Export(old_index)) {
index += 1;
} else {
2017-05-05 15:51:08 +03:00
trace!("Eliminated export({}, {})", old_index, exports.entries_mut()[index].field());
2017-05-04 16:08:57 +03:00
exports.entries_mut().remove(index);
}
old_index += 1;
}
}
if eliminated_globals.len() > 0 || eliminated_funcs.len() > 0 || eliminated_types.len() > 0 {
// Finaly, rewire all calls, globals references and types to the new indices
// (only if there is anything to do)
eliminated_globals.sort();
eliminated_funcs.sort();
eliminated_types.sort();
for section in module.sections_mut() {
match section {
2017-05-09 13:08:15 +03:00
&mut elements::Section::Function(ref mut function_section) if eliminated_types.len() > 0 => {
2017-05-04 16:08:57 +03:00
for ref mut func_signature in function_section.entries_mut() {
let totalle = eliminated_types.iter().take_while(|i| (**i as u32) < func_signature.type_ref()).count();
*func_signature.type_ref_mut() -= totalle as u32;
}
},
2017-05-09 13:08:15 +03:00
&mut elements::Section::Import(ref mut import_section) if eliminated_types.len() > 0 => {
2017-05-04 16:08:57 +03:00
for ref mut import_entry in import_section.entries_mut() {
if let &mut elements::External::Function(ref mut type_ref) = import_entry.external_mut() {
let totalle = eliminated_types.iter().take_while(|i| (**i as u32) < *type_ref).count();
*type_ref -= totalle as u32;
}
}
},
2017-05-09 13:08:15 +03:00
&mut elements::Section::Code(ref mut code_section) if eliminated_globals.len() > 0 || eliminated_funcs.len() > 0 => {
2017-05-04 16:08:57 +03:00
for ref mut func_body in code_section.bodies_mut() {
2017-05-09 13:08:15 +03:00
if eliminated_funcs.len() > 0 {
update_call_index(func_body.code_mut(), &eliminated_funcs);
}
if eliminated_globals.len() > 0 {
update_global_index(func_body.code_mut().elements_mut(), &eliminated_globals)
}
2017-05-04 16:08:57 +03:00
}
},
&mut elements::Section::Export(ref mut export_section) => {
for ref mut export in export_section.entries_mut() {
match export.internal_mut() {
&mut elements::Internal::Function(ref mut func_index) => {
let totalle = eliminated_funcs.iter().take_while(|i| (**i as u32) < *func_index).count();
*func_index -= totalle as u32;
},
&mut elements::Internal::Global(ref mut global_index) => {
let totalle = eliminated_globals.iter().take_while(|i| (**i as u32) < *global_index).count();
*global_index -= totalle as u32;
},
_ => {}
}
}
},
&mut elements::Section::Global(ref mut global_section) => {
for ref mut global_entry in global_section.entries_mut() {
update_global_index(global_entry.init_expr_mut().code_mut(), &eliminated_globals)
}
},
&mut elements::Section::Data(ref mut data_section) => {
for ref mut segment in data_section.entries_mut() {
update_global_index(segment.offset_mut().code_mut(), &eliminated_globals)
}
},
&mut elements::Section::Element(ref mut elements_section) => {
for ref mut segment in elements_section.entries_mut() {
update_global_index(segment.offset_mut().code_mut(), &eliminated_globals);
// update all indirect call addresses initial values
for func_index in segment.members_mut() {
let totalle = eliminated_funcs.iter().take_while(|i| (**i as u32) < *func_index).count();
*func_index -= totalle as u32;
}
}
},
_ => { }
}
}
}
Ok(())
2017-05-04 16:08:57 +03:00
}
pub fn update_call_index(opcodes: &mut elements::Opcodes, eliminated_indices: &[usize]) {
use parity_wasm::elements::Opcode::*;
for opcode in opcodes.elements_mut().iter_mut() {
match opcode {
&mut Block(_, ref mut block) | &mut If(_, ref mut block) | &mut Loop(_, ref mut block) => {
update_call_index(block, eliminated_indices)
},
&mut Call(ref mut call_index) => {
let totalle = eliminated_indices.iter().take_while(|i| (**i as u32) < *call_index).count();
2017-05-05 15:51:08 +03:00
trace!("rewired call {} -> call {}", *call_index, *call_index - totalle as u32);
2017-05-04 16:08:57 +03:00
*call_index -= totalle as u32;
},
_ => { },
}
}
}
/// Updates global references considering the _ordered_ list of eliminated indices
pub fn update_global_index(opcodes: &mut Vec<elements::Opcode>, eliminated_indices: &[usize]) {
use parity_wasm::elements::Opcode::*;
for opcode in opcodes.iter_mut() {
match opcode {
&mut Block(_, ref mut block) | &mut If(_, ref mut block) | &mut Loop(_, ref mut block) => {
update_global_index(block.elements_mut(), eliminated_indices)
},
&mut GetGlobal(ref mut index) | &mut SetGlobal(ref mut index) => {
let totalle = eliminated_indices.iter().take_while(|i| (**i as u32) < *index).count();
2017-05-05 15:51:08 +03:00
trace!("rewired global {} -> global {}", *index, *index - totalle as u32);
2017-05-04 16:08:57 +03:00
*index -= totalle as u32;
},
_ => { },
}
}
}
pub fn import_section<'a>(module: &'a mut elements::Module) -> Option<&'a mut elements::ImportSection> {
for section in module.sections_mut() {
match section {
&mut elements::Section::Import(ref mut sect) => {
return Some(sect);
},
_ => { }
}
}
None
}
pub fn global_section<'a>(module: &'a mut elements::Module) -> Option<&'a mut elements::GlobalSection> {
for section in module.sections_mut() {
match section {
&mut elements::Section::Global(ref mut sect) => {
return Some(sect);
},
_ => { }
}
}
None
}
pub fn functions_section<'a>(module: &'a mut elements::Module) -> Option<&'a mut elements::FunctionsSection> {
for section in module.sections_mut() {
match section {
&mut elements::Section::Function(ref mut sect) => {
return Some(sect);
},
_ => { }
}
}
None
}
pub fn code_section<'a>(module: &'a mut elements::Module) -> Option<&'a mut elements::CodeSection> {
for section in module.sections_mut() {
match section {
&mut elements::Section::Code(ref mut sect) => {
return Some(sect);
},
_ => { }
}
}
None
}
pub fn export_section<'a>(module: &'a mut elements::Module) -> Option<&'a mut elements::ExportSection> {
for section in module.sections_mut() {
match section {
&mut elements::Section::Export(ref mut sect) => {
return Some(sect);
},
_ => { }
}
}
None
}
pub fn type_section<'a>(module: &'a mut elements::Module) -> Option<&'a mut elements::TypeSection> {
for section in module.sections_mut() {
match section {
&mut elements::Section::Type(ref mut sect) => {
return Some(sect);
},
_ => { }
}
}
None
2017-05-09 12:46:49 +03:00
}
#[cfg(test)]
mod tests {
2017-05-09 18:06:06 +03:00
use parity_wasm::{builder, elements};
2017-05-09 12:46:49 +03:00
use super::*;
2017-05-09 18:06:06 +03:00
/// @spec 0
/// Optimizer presumes that export section exists and contains
/// all symbols passed as a second parameter. Since empty module
/// obviously contains no export section, optimizer should return
/// error on it.
2017-05-09 12:46:49 +03:00
#[test]
fn empty() {
let mut module = builder::module().build();
let result = optimize(&mut module, vec!["_call"]);
2017-05-09 12:46:49 +03:00
assert!(result.is_err());
2017-05-09 12:46:49 +03:00
}
2017-05-09 15:35:49 +03:00
2017-05-09 18:06:06 +03:00
/// @spec 1
/// Imagine the unoptimized module has two own functions, `_call` and `_random`
/// and exports both of them in the export section. During optimization, the `_random`
/// function should vanish completely, given we pass `_call` as the only function to stay
/// in the module.
2017-05-09 15:35:49 +03:00
#[test]
fn minimal() {
let mut module = builder::module()
.function()
.signature().param().i32().build()
.build()
.function()
2017-05-09 18:06:06 +03:00
.signature()
.param().i32()
.param().i32()
.build()
2017-05-09 15:35:49 +03:00
.build()
.export()
.field("_call")
.internal().func(0).build()
.export()
.field("_random")
.internal().func(1).build()
.build();
assert_eq!(module.export_section().expect("export section to be generated").entries().len(), 2);
optimize(&mut module, vec!["_call"]).expect("optimizer to succeed");
assert_eq!(
1,
module.export_section().expect("export section to be generated").entries().len(),
"There should only 1 (one) export entry in the optimized module"
2017-05-09 18:16:08 +03:00
);
assert_eq!(
1,
module.functions_section().expect("functions section to be generated").entries().len(),
"There should 2 (two) functions in the optimized module"
2017-05-09 15:35:49 +03:00
);
}
2017-05-09 18:06:06 +03:00
/// @spec 2
/// Imagine there is one exported function in unoptimized module, `_call`, that we specify as the one
/// to stay during the optimization. The code of this function uses global during the execution.
/// This sayed global should survive the optimization.
#[test]
fn globals() {
let mut module = builder::module()
.global()
.value_type().i32()
.build()
.function()
.signature().param().i32().build()
.body()
.with_opcodes(elements::Opcodes::new(
vec![
elements::Opcode::GetGlobal(0),
elements::Opcode::End
]
))
.build()
.build()
.export()
.field("_call")
.internal().func(0).build()
.build();
optimize(&mut module, vec!["_call"]).expect("optimizer to succeed");
assert_eq!(
1,
module.global_section().expect("global section to be generated").entries().len(),
"There should 1 (one) global entry in the optimized module, since _call function uses it"
);
}
2017-05-09 18:10:24 +03:00
/// @spec 2
/// Imagine there is one exported function in unoptimized module, `_call`, that we specify as the one
/// to stay during the optimization. The code of this function uses one global during the execution,
/// but we have a bunch of other unused globals in the code. Last globals should not survive the optimization,
/// while the former should.
#[test]
fn globals_2() {
let mut module = builder::module()
.global()
.value_type().i32()
.build()
.global()
.value_type().i64()
.build()
.global()
.value_type().f32()
.build()
.function()
.signature().param().i32().build()
.body()
.with_opcodes(elements::Opcodes::new(
vec![
elements::Opcode::GetGlobal(1),
elements::Opcode::End
]
))
.build()
.build()
.export()
.field("_call")
.internal().func(0).build()
.build();
optimize(&mut module, vec!["_call"]).expect("optimizer to succeed");
assert_eq!(
1,
module.global_section().expect("global section to be generated").entries().len(),
"There should 1 (one) global entry in the optimized module, since _call function uses only one"
);
}
2017-05-09 18:16:08 +03:00
/// @spec 3
/// Imagine the unoptimized module has two own functions, `_call` and `_random`
/// and exports both of them in the export section. Function `_call` also calls `_random`
/// in its function body. The optimization should kick `_random` function from the export section
/// but preserve it's body.
#[test]
fn call_ref() {
let mut module = builder::module()
.function()
.signature().param().i32().build()
.body()
.with_opcodes(elements::Opcodes::new(
vec![
elements::Opcode::Call(1),
elements::Opcode::End
]
))
.build()
.build()
.function()
.signature()
.param().i32()
.param().i32()
.build()
.build()
.export()
.field("_call")
.internal().func(0).build()
.export()
.field("_random")
.internal().func(1).build()
.build();
assert_eq!(module.export_section().expect("export section to be generated").entries().len(), 2);
optimize(&mut module, vec!["_call"]).expect("optimizer to succeed");
assert_eq!(
1,
module.export_section().expect("export section to be generated").entries().len(),
"There should only 1 (one) export entry in the optimized module"
);
assert_eq!(
2,
module.functions_section().expect("functions section to be generated").entries().len(),
"There should 2 (two) functions in the optimized module"
);
}
2017-05-04 16:08:57 +03:00
}