Move out parser and codegen traits.

This commit is contained in:
losfair
2019-04-27 12:31:04 +08:00
parent 9f8bbb72cb
commit eca8ccdbd4
7 changed files with 304 additions and 147 deletions

View File

@ -0,0 +1,240 @@
use crate::{
backend::RunnableModule,
structures::Map,
types::{FuncIndex, FuncSig, SigIndex},
backend::{sys::Memory, Backend, CacheGen, Compiler, CompilerConfig, Token},
cache::{Artifact, Error as CacheError},
error::{CompileError, CompileResult},
module::{ModuleInfo, ModuleInner},
parse::LoadError,
};
use wasmparser::{Operator, Type as WpType};
use std::fmt::Debug;
use smallvec::SmallVec;
use std::marker::PhantomData;
pub enum Event<'a, 'b> {
Internal(InternalEvent),
Wasm(&'b Operator<'a>),
}
#[derive(Copy, Clone, Debug)]
pub enum InternalEvent {
FunctionBegin,
FunctionEnd,
Trace,
}
pub trait ModuleCodeGenerator<FCG: FunctionCodeGenerator<E>, RM: RunnableModule, E: Debug> {
fn new() -> Self;
fn backend_id() -> Backend;
fn check_precondition(&mut self, module_info: &ModuleInfo) -> Result<(), E>;
/// Creates a new function and returns the function-scope code generator for it.
fn next_function(&mut self) -> Result<&mut FCG, E>;
fn finalize(self, module_info: &ModuleInfo) -> Result<RM, E>;
fn feed_signatures(&mut self, signatures: Map<SigIndex, FuncSig>) -> Result<(), E>;
/// Sets function signatures.
fn feed_function_signatures(
&mut self,
assoc: Map<FuncIndex, SigIndex>,
) -> Result<(), E>;
/// Adds an import function.
fn feed_import_function(&mut self) -> Result<(), E>;
}
pub struct StreamingCompiler<
MCG: ModuleCodeGenerator<FCG, RM, E>,
FCG: FunctionCodeGenerator<E>,
RM: RunnableModule + 'static,
E: Debug,
CGEN: Fn() -> MiddlewareChain,
> {
middleware_chain_generator: CGEN,
_phantom_mcg: PhantomData<MCG>,
_phantom_fcg: PhantomData<FCG>,
_phantom_rm: PhantomData<RM>,
_phantom_e: PhantomData<E>,
}
pub struct SimpleStreamingCompilerGen<
MCG: ModuleCodeGenerator<FCG, RM, E>,
FCG: FunctionCodeGenerator<E>,
RM: RunnableModule + 'static,
E: Debug,
> {
_phantom_mcg: PhantomData<MCG>,
_phantom_fcg: PhantomData<FCG>,
_phantom_rm: PhantomData<RM>,
_phantom_e: PhantomData<E>,
}
impl<
MCG: ModuleCodeGenerator<FCG, RM, E>,
FCG: FunctionCodeGenerator<E>,
RM: RunnableModule + 'static,
E: Debug,
> SimpleStreamingCompilerGen<MCG, FCG, RM, E> {
pub fn new() -> StreamingCompiler<MCG, FCG, RM, E, impl Fn() -> MiddlewareChain> {
StreamingCompiler::new(|| MiddlewareChain::new())
}
}
impl<
MCG: ModuleCodeGenerator<FCG, RM, E>,
FCG: FunctionCodeGenerator<E>,
RM: RunnableModule + 'static,
E: Debug,
CGEN: Fn() -> MiddlewareChain,
> StreamingCompiler<MCG, FCG, RM, E, CGEN> {
pub fn new(chain_gen: CGEN) -> Self {
Self {
middleware_chain_generator: chain_gen,
_phantom_mcg: PhantomData,
_phantom_fcg: PhantomData,
_phantom_rm: PhantomData,
_phantom_e: PhantomData,
}
}
}
impl<
MCG: ModuleCodeGenerator<FCG, RM, E>,
FCG: FunctionCodeGenerator<E>,
RM: RunnableModule + 'static,
E: Debug,
CGEN: Fn() -> MiddlewareChain,
>Compiler for StreamingCompiler<MCG, FCG, RM, E, CGEN> {
fn compile(
&self,
wasm: &[u8],
compiler_config: CompilerConfig,
_: Token,
) -> CompileResult<ModuleInner> {
struct Placeholder;
impl CacheGen for Placeholder {
fn generate_cache(&self) -> Result<(Box<[u8]>, Memory), CacheError> {
Err(CacheError::Unknown(
"the singlepass backend doesn't support caching yet".to_string(),
))
}
}
let mut mcg = MCG::new();
let mut chain = (self.middleware_chain_generator)();
let info = crate::parse::read_module(wasm, MCG::backend_id(), &mut mcg, &mut chain, &compiler_config)?;
let exec_context = mcg.finalize(&info).map_err(|x| CompileError::InternalError {
msg: format!("{:?}", x),
})?;
Ok(ModuleInner {
cache_gen: Box::new(Placeholder),
runnable_module: Box::new(exec_context),
info: info,
})
}
unsafe fn from_cache(&self, _artifact: Artifact, _: Token) -> Result<ModuleInner, CacheError> {
Err(CacheError::Unknown(
"the singlepass backend doesn't support caching yet".to_string(),
))
}
}
pub struct EventSink<'a, 'b> {
buffer: SmallVec<[Event<'a, 'b>; 2]>
}
impl<'a, 'b> EventSink<'a, 'b> {
pub fn push(&mut self, ev: Event<'a, 'b>) {
self.buffer.push(ev);
}
}
pub struct MiddlewareChain {
chain: Vec<Box<GenericFunctionMiddleware>>,
}
impl MiddlewareChain {
pub fn new() -> MiddlewareChain {
MiddlewareChain {
chain: vec! [],
}
}
pub fn push<M: FunctionMiddleware + 'static>(&mut self, m: M) {
self.chain.push(Box::new(m));
}
pub(crate) fn run<E: Debug, FCG: FunctionCodeGenerator<E>>(&mut self, fcg: Option<&mut FCG>, ev: Event, module_info: &ModuleInfo) -> Result<(), String> {
let mut sink = EventSink {
buffer: SmallVec::new(),
};
sink.push(ev);
for m in &mut self.chain {
let prev: SmallVec<[Event; 2]> = sink.buffer.drain().collect();
for ev in prev {
m.feed_event(ev, module_info, &mut sink)?;
}
}
if let Some(fcg) = fcg {
for ev in sink.buffer {
fcg.feed_event(ev, module_info).map_err(|x| format!("{:?}", x))?;
}
}
Ok(())
}
}
pub trait FunctionMiddleware {
type Error: Debug;
fn feed_event(
&mut self,
op: Event,
module_info: &ModuleInfo,
sink: &mut EventSink,
) -> Result<(), Self::Error>;
}
pub(crate) trait GenericFunctionMiddleware {
fn feed_event(
&mut self,
op: Event,
module_info: &ModuleInfo,
sink: &mut EventSink,
) -> Result<(), String>;
}
impl<E: Debug, T: FunctionMiddleware<Error = E>> GenericFunctionMiddleware for T {
fn feed_event(
&mut self,
op: Event,
module_info: &ModuleInfo,
sink: &mut EventSink,
) -> Result<(), String> {
<Self as FunctionMiddleware>::feed_event(self, op, module_info, sink).map_err(|x| format!("{:?}", x))
}
}
/// The function-scope code generator trait.
pub trait FunctionCodeGenerator<E: Debug> {
/// Sets the return type.
fn feed_return(&mut self, ty: WpType) -> Result<(), E>;
/// Adds a parameter to the function.
fn feed_param(&mut self, ty: WpType) -> Result<(), E>;
/// Adds `n` locals to the function.
fn feed_local(&mut self, ty: WpType, n: usize) -> Result<(), E>;
/// Called before the first call to `feed_opcode`.
fn begin_body(&mut self) -> Result<(), E>;
/// Called for each operator.
fn feed_event(&mut self, op: Event, module_info: &ModuleInfo) -> Result<(), E>;
/// Finalizes the function.
fn finalize(&mut self) -> Result<(), E>;
}

View File

@ -31,6 +31,8 @@ pub mod units;
pub mod vm;
#[doc(hidden)]
pub mod vmcalls;
pub mod codegen;
pub mod parse;
use self::error::CompileResult;
#[doc(inline)]

View File

@ -0,0 +1,402 @@
use crate::codegen::*;
use hashbrown::HashMap;
use crate::{
backend::{Backend, CompilerConfig, RunnableModule},
module::{
DataInitializer, ExportIndex, ImportName, ModuleInfo, StringTable, StringTableBuilder,
TableInitializer,
},
structures::{Map, TypedIndex},
types::{
ElementType, FuncIndex, FuncSig, GlobalDescriptor, GlobalIndex, GlobalInit,
ImportedGlobalIndex, Initializer, MemoryDescriptor, MemoryIndex, SigIndex, TableDescriptor,
TableIndex, Type, Value,
},
units::Pages,
error::CompileError,
};
use wasmparser::{
BinaryReaderError, Data, DataKind, Element, ElementKind, Export, ExternalKind, FuncType,
Import, ImportSectionEntryType, InitExpr, ModuleReader, Operator, SectionCode, Type as WpType,
WasmDecoder,
};
use std::fmt::Debug;
#[derive(Debug)]
pub enum LoadError {
Parse(BinaryReaderError),
Codegen(String),
}
impl From<LoadError> for CompileError {
fn from(other: LoadError) -> CompileError {
CompileError::InternalError {
msg: format!("{:?}", other),
}
}
}
impl From<BinaryReaderError> for LoadError {
fn from(other: BinaryReaderError) -> LoadError {
LoadError::Parse(other)
}
}
pub fn read_module<
MCG: ModuleCodeGenerator<FCG, RM, E>,
FCG: FunctionCodeGenerator<E>,
RM: RunnableModule,
E: Debug,
>(
wasm: &[u8],
backend: Backend,
mcg: &mut MCG,
middlewares: &mut MiddlewareChain,
compiler_config: &CompilerConfig,
) -> Result<ModuleInfo, LoadError> {
let mut info = ModuleInfo {
memories: Map::new(),
globals: Map::new(),
tables: Map::new(),
imported_functions: Map::new(),
imported_memories: Map::new(),
imported_tables: Map::new(),
imported_globals: Map::new(),
exports: Default::default(),
data_initializers: Vec::new(),
elem_initializers: Vec::new(),
start_func: None,
func_assoc: Map::new(),
signatures: Map::new(),
backend: backend,
namespace_table: StringTable::new(),
name_table: StringTable::new(),
em_symbol_map: compiler_config.symbol_map.clone(),
custom_sections: HashMap::new(),
};
let mut parser = wasmparser::ValidatingParser::new(
wasm,
Some(wasmparser::ValidatingParserConfig {
operator_config: wasmparser::OperatorValidatorConfig {
enable_threads: false,
enable_reference_types: false,
enable_simd: false,
enable_bulk_memory: false,
},
mutable_global_imports: false,
}),
);
let mut namespace_builder = Some(StringTableBuilder::new());
let mut name_builder = Some(StringTableBuilder::new());
let mut func_count: usize = ::std::usize::MAX;
loop {
use wasmparser::ParserState;
let state = parser.read();
match *state {
ParserState::EndWasm => break Ok(info),
ParserState::Error(err) => Err(LoadError::Parse(err))?,
ParserState::TypeSectionEntry(ref ty) => {
info.signatures.push(func_type_to_func_sig(ty)?);
}
ParserState::ImportSectionEntry { module, field, ty } => {
let namespace_index = namespace_builder.as_mut().unwrap().register(module);
let name_index = name_builder.as_mut().unwrap().register(field);
let import_name = ImportName {
namespace_index,
name_index,
};
match ty {
ImportSectionEntryType::Function(sigindex) => {
let sigindex = SigIndex::new(sigindex as usize);
info.imported_functions.push(import_name);
info.func_assoc.push(sigindex);
mcg.feed_import_function().map_err(|x| LoadError::Codegen(format!("{:?}", x)))?;
}
ImportSectionEntryType::Table(table_ty) => {
assert_eq!(table_ty.element_type, WpType::AnyFunc);
let table_desc = TableDescriptor {
element: ElementType::Anyfunc,
minimum: table_ty.limits.initial,
maximum: table_ty.limits.maximum,
};
info.imported_tables.push((import_name, table_desc));
}
ImportSectionEntryType::Memory(memory_ty) => {
let mem_desc = MemoryDescriptor {
minimum: Pages(memory_ty.limits.initial),
maximum: memory_ty.limits.maximum.map(|max| Pages(max)),
shared: memory_ty.shared,
};
info.imported_memories.push((import_name, mem_desc));
}
ImportSectionEntryType::Global(global_ty) => {
let global_desc = GlobalDescriptor {
mutable: global_ty.mutable,
ty: wp_type_to_type(global_ty.content_type)?,
};
info.imported_globals.push((import_name, global_desc));
}
}
}
ParserState::FunctionSectionEntry(sigindex) => {
let sigindex = SigIndex::new(sigindex as usize);
info.func_assoc.push(sigindex);
}
ParserState::TableSectionEntry(table_ty) => {
let table_desc = TableDescriptor {
element: ElementType::Anyfunc,
minimum: table_ty.limits.initial,
maximum: table_ty.limits.maximum,
};
info.tables.push(table_desc);
}
ParserState::MemorySectionEntry(memory_ty) => {
let mem_desc = MemoryDescriptor {
minimum: Pages(memory_ty.limits.initial),
maximum: memory_ty.limits.maximum.map(|max| Pages(max)),
shared: memory_ty.shared,
};
info.memories.push(mem_desc);
}
ParserState::ExportSectionEntry { field, kind, index } => {
let export_index = match kind {
ExternalKind::Function => ExportIndex::Func(FuncIndex::new(index as usize)),
ExternalKind::Table => ExportIndex::Table(TableIndex::new(index as usize)),
ExternalKind::Memory => ExportIndex::Memory(MemoryIndex::new(index as usize)),
ExternalKind::Global => ExportIndex::Global(GlobalIndex::new(index as usize)),
};
info.exports.insert(field.to_string(), export_index);
}
ParserState::StartSectionEntry(start_index) => {
info.start_func = Some(FuncIndex::new(start_index as usize));
}
ParserState::BeginFunctionBody { .. } => {
let id = func_count.wrapping_add(1);
func_count = id;
if func_count == 0 {
info.namespace_table = namespace_builder.take().unwrap().finish();
info.name_table = name_builder.take().unwrap().finish();
mcg.feed_signatures(info.signatures.clone()).map_err(|x| LoadError::Codegen(format!("{:?}", x)))?;
mcg.feed_function_signatures(info.func_assoc.clone()).map_err(|x| LoadError::Codegen(format!("{:?}", x)))?;
mcg.check_precondition(&info).map_err(|x| LoadError::Codegen(format!("{:?}", x)))?;
}
let fcg = mcg.next_function().map_err(|x| LoadError::Codegen(format!("{:?}", x)))?;
middlewares.run(Some(fcg), Event::Internal(InternalEvent::FunctionBegin), &info).map_err(|x| LoadError::Codegen(x))?;
let sig = info
.signatures
.get(
*info
.func_assoc
.get(FuncIndex::new(id as usize + info.imported_functions.len()))
.unwrap(),
)
.unwrap();
for ret in sig.returns() {
fcg.feed_return(type_to_wp_type(*ret)).map_err(|x| LoadError::Codegen(format!("{:?}", x)))?;
}
for param in sig.params() {
fcg.feed_param(type_to_wp_type(*param)).map_err(|x| LoadError::Codegen(format!("{:?}", x)))?;
}
let mut body_begun = false;
loop {
let state = parser.read();
match *state {
ParserState::Error(err) => return Err(LoadError::Parse(err)),
ParserState::FunctionBodyLocals { ref locals } => {
for &(count, ty) in locals.iter() {
fcg.feed_local(ty, count as usize).map_err(|x| LoadError::Codegen(format!("{:?}", x)))?;
}
}
ParserState::CodeOperator(ref op) => {
if !body_begun {
body_begun = true;
fcg.begin_body().map_err(|x| LoadError::Codegen(format!("{:?}", x)))?;
}
middlewares.run(Some(fcg), Event::Wasm(op), &info).map_err(|x| LoadError::Codegen(x))?;
}
ParserState::EndFunctionBody => break,
_ => unreachable!(),
}
}
middlewares.run(Some(fcg), Event::Internal(InternalEvent::FunctionEnd), &info).map_err(|x| LoadError::Codegen(x))?;
fcg.finalize().map_err(|x| LoadError::Codegen(format!("{:?}", x)))?;
}
ParserState::BeginActiveElementSectionEntry(table_index) => {
let table_index = TableIndex::new(table_index as usize);
let mut elements: Option<Vec<FuncIndex>> = None;
let mut base: Option<Initializer> = None;
loop {
let state = parser.read();
match *state {
ParserState::Error(err) => return Err(LoadError::Parse(err)),
ParserState::InitExpressionOperator(ref op) => {
base = Some(eval_init_expr(op)?)
}
ParserState::ElementSectionEntryBody(ref _elements) => {
elements = Some(
_elements
.iter()
.cloned()
.map(|index| FuncIndex::new(index as usize))
.collect(),
);
}
ParserState::BeginInitExpressionBody
| ParserState::EndInitExpressionBody => {}
ParserState::EndElementSectionEntry => break,
_ => unreachable!(),
}
}
let table_init = TableInitializer {
table_index,
base: base.unwrap(),
elements: elements.unwrap(),
};
info.elem_initializers.push(table_init);
}
ParserState::BeginActiveDataSectionEntry(memory_index) => {
let memory_index = MemoryIndex::new(memory_index as usize);
let mut base: Option<Initializer> = None;
let mut data: Vec<u8> = vec![];
loop {
let state = parser.read();
match *state {
ParserState::Error(err) => return Err(LoadError::Parse(err)),
ParserState::InitExpressionOperator(ref op) => {
base = Some(eval_init_expr(op)?)
}
ParserState::DataSectionEntryBodyChunk(chunk) => {
data = chunk.to_vec();
}
ParserState::BeginInitExpressionBody
| ParserState::EndInitExpressionBody => {}
ParserState::BeginDataSectionEntryBody(_)
| ParserState::EndDataSectionEntryBody => {}
ParserState::EndDataSectionEntry => break,
_ => unreachable!(),
}
}
let data_init = DataInitializer {
memory_index,
base: base.unwrap(),
data,
};
info.data_initializers.push(data_init);
}
ParserState::BeginGlobalSectionEntry(ty) => {
let init = loop {
let state = parser.read();
match *state {
ParserState::Error(err) => return Err(LoadError::Parse(err)),
ParserState::InitExpressionOperator(ref op) => {
break eval_init_expr(op)?;
}
ParserState::BeginInitExpressionBody => {}
_ => unreachable!(),
}
};
let desc = GlobalDescriptor {
mutable: ty.mutable,
ty: wp_type_to_type(ty.content_type)?,
};
let global_init = GlobalInit { desc, init };
info.globals.push(global_init);
}
_ => {}
}
}
}
pub fn wp_type_to_type(ty: WpType) -> Result<Type, BinaryReaderError> {
Ok(match ty {
WpType::I32 => Type::I32,
WpType::I64 => Type::I64,
WpType::F32 => Type::F32,
WpType::F64 => Type::F64,
WpType::V128 => {
return Err(BinaryReaderError {
message: "the wasmer llvm backend does not yet support the simd extension",
offset: -1isize as usize,
});
}
_ => panic!("broken invariant, invalid type"),
})
}
pub fn type_to_wp_type(ty: Type) -> WpType {
match ty {
Type::I32 => WpType::I32,
Type::I64 => WpType::I64,
Type::F32 => WpType::F32,
Type::F64 => WpType::F64,
}
}
fn func_type_to_func_sig(func_ty: &FuncType) -> Result<FuncSig, BinaryReaderError> {
assert_eq!(func_ty.form, WpType::Func);
Ok(FuncSig::new(
func_ty
.params
.iter()
.cloned()
.map(wp_type_to_type)
.collect::<Result<Vec<_>, _>>()?,
func_ty
.returns
.iter()
.cloned()
.map(wp_type_to_type)
.collect::<Result<Vec<_>, _>>()?,
))
}
fn eval_init_expr(op: &Operator) -> Result<Initializer, BinaryReaderError> {
Ok(match *op {
Operator::GetGlobal { global_index } => {
Initializer::GetGlobal(ImportedGlobalIndex::new(global_index as usize))
}
Operator::I32Const { value } => Initializer::Const(Value::I32(value)),
Operator::I64Const { value } => Initializer::Const(Value::I64(value)),
Operator::F32Const { value } => {
Initializer::Const(Value::F32(f32::from_bits(value.bits())))
}
Operator::F64Const { value } => {
Initializer::Const(Value::F64(f64::from_bits(value.bits())))
}
_ => {
return Err(BinaryReaderError {
message: "init expr evaluation failed: unsupported opcode",
offset: -1isize as usize,
});
}
})
}